File: | lib/Transforms/InstCombine/InstCombineAddSub.cpp |
Warning: | line 209, column 33 The expression is an uninitialized value. The computed value will also be garbage |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This file implements the visit functions for add, fadd, sub, and fsub. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "InstCombineInternal.h" | |||
15 | #include "llvm/ADT/APFloat.h" | |||
16 | #include "llvm/ADT/APInt.h" | |||
17 | #include "llvm/ADT/STLExtras.h" | |||
18 | #include "llvm/ADT/SmallVector.h" | |||
19 | #include "llvm/Analysis/InstructionSimplify.h" | |||
20 | #include "llvm/Analysis/ValueTracking.h" | |||
21 | #include "llvm/IR/Constant.h" | |||
22 | #include "llvm/IR/Constants.h" | |||
23 | #include "llvm/IR/InstrTypes.h" | |||
24 | #include "llvm/IR/Instruction.h" | |||
25 | #include "llvm/IR/Instructions.h" | |||
26 | #include "llvm/IR/Operator.h" | |||
27 | #include "llvm/IR/PatternMatch.h" | |||
28 | #include "llvm/IR/Type.h" | |||
29 | #include "llvm/IR/Value.h" | |||
30 | #include "llvm/Support/AlignOf.h" | |||
31 | #include "llvm/Support/Casting.h" | |||
32 | #include "llvm/Support/KnownBits.h" | |||
33 | #include <cassert> | |||
34 | #include <utility> | |||
35 | ||||
36 | using namespace llvm; | |||
37 | using namespace PatternMatch; | |||
38 | ||||
39 | #define DEBUG_TYPE"instcombine" "instcombine" | |||
40 | ||||
41 | namespace { | |||
42 | ||||
43 | /// Class representing coefficient of floating-point addend. | |||
44 | /// This class needs to be highly efficient, which is especially true for | |||
45 | /// the constructor. As of I write this comment, the cost of the default | |||
46 | /// constructor is merely 4-byte-store-zero (Assuming compiler is able to | |||
47 | /// perform write-merging). | |||
48 | /// | |||
49 | class FAddendCoef { | |||
50 | public: | |||
51 | // The constructor has to initialize a APFloat, which is unnecessary for | |||
52 | // most addends which have coefficient either 1 or -1. So, the constructor | |||
53 | // is expensive. In order to avoid the cost of the constructor, we should | |||
54 | // reuse some instances whenever possible. The pre-created instances | |||
55 | // FAddCombine::Add[0-5] embodies this idea. | |||
56 | FAddendCoef() = default; | |||
57 | ~FAddendCoef(); | |||
58 | ||||
59 | // If possible, don't define operator+/operator- etc because these | |||
60 | // operators inevitably call FAddendCoef's constructor which is not cheap. | |||
61 | void operator=(const FAddendCoef &A); | |||
62 | void operator+=(const FAddendCoef &A); | |||
63 | void operator*=(const FAddendCoef &S); | |||
64 | ||||
65 | void set(short C) { | |||
66 | assert(!insaneIntVal(C) && "Insane coefficient")(static_cast <bool> (!insaneIntVal(C) && "Insane coefficient" ) ? void (0) : __assert_fail ("!insaneIntVal(C) && \"Insane coefficient\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 66, __extension__ __PRETTY_FUNCTION__)); | |||
67 | IsFp = false; IntVal = C; | |||
68 | } | |||
69 | ||||
70 | void set(const APFloat& C); | |||
71 | ||||
72 | void negate(); | |||
73 | ||||
74 | bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } | |||
75 | Value *getValue(Type *) const; | |||
76 | ||||
77 | bool isOne() const { return isInt() && IntVal == 1; } | |||
78 | bool isTwo() const { return isInt() && IntVal == 2; } | |||
79 | bool isMinusOne() const { return isInt() && IntVal == -1; } | |||
80 | bool isMinusTwo() const { return isInt() && IntVal == -2; } | |||
81 | ||||
82 | private: | |||
83 | bool insaneIntVal(int V) { return V > 4 || V < -4; } | |||
84 | ||||
85 | APFloat *getFpValPtr() | |||
86 | { return reinterpret_cast<APFloat *>(&FpValBuf.buffer[0]); } | |||
87 | ||||
88 | const APFloat *getFpValPtr() const | |||
89 | { return reinterpret_cast<const APFloat *>(&FpValBuf.buffer[0]); } | |||
90 | ||||
91 | const APFloat &getFpVal() const { | |||
92 | assert(IsFp && BufHasFpVal && "Incorret state")(static_cast <bool> (IsFp && BufHasFpVal && "Incorret state") ? void (0) : __assert_fail ("IsFp && BufHasFpVal && \"Incorret state\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 92, __extension__ __PRETTY_FUNCTION__)); | |||
93 | return *getFpValPtr(); | |||
94 | } | |||
95 | ||||
96 | APFloat &getFpVal() { | |||
97 | assert(IsFp && BufHasFpVal && "Incorret state")(static_cast <bool> (IsFp && BufHasFpVal && "Incorret state") ? void (0) : __assert_fail ("IsFp && BufHasFpVal && \"Incorret state\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 97, __extension__ __PRETTY_FUNCTION__)); | |||
98 | return *getFpValPtr(); | |||
99 | } | |||
100 | ||||
101 | bool isInt() const { return !IsFp; } | |||
102 | ||||
103 | // If the coefficient is represented by an integer, promote it to a | |||
104 | // floating point. | |||
105 | void convertToFpType(const fltSemantics &Sem); | |||
106 | ||||
107 | // Construct an APFloat from a signed integer. | |||
108 | // TODO: We should get rid of this function when APFloat can be constructed | |||
109 | // from an *SIGNED* integer. | |||
110 | APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); | |||
111 | ||||
112 | bool IsFp = false; | |||
113 | ||||
114 | // True iff FpValBuf contains an instance of APFloat. | |||
115 | bool BufHasFpVal = false; | |||
116 | ||||
117 | // The integer coefficient of an individual addend is either 1 or -1, | |||
118 | // and we try to simplify at most 4 addends from neighboring at most | |||
119 | // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt | |||
120 | // is overkill of this end. | |||
121 | short IntVal = 0; | |||
122 | ||||
123 | AlignedCharArrayUnion<APFloat> FpValBuf; | |||
124 | }; | |||
125 | ||||
126 | /// FAddend is used to represent floating-point addend. An addend is | |||
127 | /// represented as <C, V>, where the V is a symbolic value, and C is a | |||
128 | /// constant coefficient. A constant addend is represented as <C, 0>. | |||
129 | class FAddend { | |||
130 | public: | |||
131 | FAddend() = default; | |||
132 | ||||
133 | void operator+=(const FAddend &T) { | |||
134 | assert((Val == T.Val) && "Symbolic-values disagree")(static_cast <bool> ((Val == T.Val) && "Symbolic-values disagree" ) ? void (0) : __assert_fail ("(Val == T.Val) && \"Symbolic-values disagree\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 134, __extension__ __PRETTY_FUNCTION__)); | |||
135 | Coeff += T.Coeff; | |||
136 | } | |||
137 | ||||
138 | Value *getSymVal() const { return Val; } | |||
139 | const FAddendCoef &getCoef() const { return Coeff; } | |||
140 | ||||
141 | bool isConstant() const { return Val == nullptr; } | |||
142 | bool isZero() const { return Coeff.isZero(); } | |||
143 | ||||
144 | void set(short Coefficient, Value *V) { | |||
145 | Coeff.set(Coefficient); | |||
146 | Val = V; | |||
147 | } | |||
148 | void set(const APFloat &Coefficient, Value *V) { | |||
149 | Coeff.set(Coefficient); | |||
150 | Val = V; | |||
151 | } | |||
152 | void set(const ConstantFP *Coefficient, Value *V) { | |||
153 | Coeff.set(Coefficient->getValueAPF()); | |||
154 | Val = V; | |||
155 | } | |||
156 | ||||
157 | void negate() { Coeff.negate(); } | |||
158 | ||||
159 | /// Drill down the U-D chain one step to find the definition of V, and | |||
160 | /// try to break the definition into one or two addends. | |||
161 | static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); | |||
162 | ||||
163 | /// Similar to FAddend::drillDownOneStep() except that the value being | |||
164 | /// splitted is the addend itself. | |||
165 | unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; | |||
166 | ||||
167 | private: | |||
168 | void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } | |||
169 | ||||
170 | // This addend has the value of "Coeff * Val". | |||
171 | Value *Val = nullptr; | |||
172 | FAddendCoef Coeff; | |||
173 | }; | |||
174 | ||||
175 | /// FAddCombine is the class for optimizing an unsafe fadd/fsub along | |||
176 | /// with its neighboring at most two instructions. | |||
177 | /// | |||
178 | class FAddCombine { | |||
179 | public: | |||
180 | FAddCombine(InstCombiner::BuilderTy &B) : Builder(B) {} | |||
181 | ||||
182 | Value *simplify(Instruction *FAdd); | |||
183 | ||||
184 | private: | |||
185 | using AddendVect = SmallVector<const FAddend *, 4>; | |||
186 | ||||
187 | Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); | |||
188 | ||||
189 | Value *performFactorization(Instruction *I); | |||
190 | ||||
191 | /// Convert given addend to a Value | |||
192 | Value *createAddendVal(const FAddend &A, bool& NeedNeg); | |||
193 | ||||
194 | /// Return the number of instructions needed to emit the N-ary addition. | |||
195 | unsigned calcInstrNumber(const AddendVect& Vect); | |||
196 | ||||
197 | Value *createFSub(Value *Opnd0, Value *Opnd1); | |||
198 | Value *createFAdd(Value *Opnd0, Value *Opnd1); | |||
199 | Value *createFMul(Value *Opnd0, Value *Opnd1); | |||
200 | Value *createFDiv(Value *Opnd0, Value *Opnd1); | |||
201 | Value *createFNeg(Value *V); | |||
202 | Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); | |||
203 | void createInstPostProc(Instruction *NewInst, bool NoNumber = false); | |||
204 | ||||
205 | // Debugging stuff are clustered here. | |||
206 | #ifndef NDEBUG | |||
207 | unsigned CreateInstrNum; | |||
208 | void initCreateInstNum() { CreateInstrNum = 0; } | |||
209 | void incCreateInstNum() { CreateInstrNum++; } | |||
| ||||
210 | #else | |||
211 | void initCreateInstNum() {} | |||
212 | void incCreateInstNum() {} | |||
213 | #endif | |||
214 | ||||
215 | InstCombiner::BuilderTy &Builder; | |||
216 | Instruction *Instr = nullptr; | |||
217 | }; | |||
218 | ||||
219 | } // end anonymous namespace | |||
220 | ||||
221 | //===----------------------------------------------------------------------===// | |||
222 | // | |||
223 | // Implementation of | |||
224 | // {FAddendCoef, FAddend, FAddition, FAddCombine}. | |||
225 | // | |||
226 | //===----------------------------------------------------------------------===// | |||
227 | FAddendCoef::~FAddendCoef() { | |||
228 | if (BufHasFpVal) | |||
229 | getFpValPtr()->~APFloat(); | |||
230 | } | |||
231 | ||||
232 | void FAddendCoef::set(const APFloat& C) { | |||
233 | APFloat *P = getFpValPtr(); | |||
234 | ||||
235 | if (isInt()) { | |||
236 | // As the buffer is meanless byte stream, we cannot call | |||
237 | // APFloat::operator=(). | |||
238 | new(P) APFloat(C); | |||
239 | } else | |||
240 | *P = C; | |||
241 | ||||
242 | IsFp = BufHasFpVal = true; | |||
243 | } | |||
244 | ||||
245 | void FAddendCoef::convertToFpType(const fltSemantics &Sem) { | |||
246 | if (!isInt()) | |||
247 | return; | |||
248 | ||||
249 | APFloat *P = getFpValPtr(); | |||
250 | if (IntVal > 0) | |||
251 | new(P) APFloat(Sem, IntVal); | |||
252 | else { | |||
253 | new(P) APFloat(Sem, 0 - IntVal); | |||
254 | P->changeSign(); | |||
255 | } | |||
256 | IsFp = BufHasFpVal = true; | |||
257 | } | |||
258 | ||||
259 | APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { | |||
260 | if (Val >= 0) | |||
261 | return APFloat(Sem, Val); | |||
262 | ||||
263 | APFloat T(Sem, 0 - Val); | |||
264 | T.changeSign(); | |||
265 | ||||
266 | return T; | |||
267 | } | |||
268 | ||||
269 | void FAddendCoef::operator=(const FAddendCoef &That) { | |||
270 | if (That.isInt()) | |||
271 | set(That.IntVal); | |||
272 | else | |||
273 | set(That.getFpVal()); | |||
274 | } | |||
275 | ||||
276 | void FAddendCoef::operator+=(const FAddendCoef &That) { | |||
277 | enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; | |||
278 | if (isInt() == That.isInt()) { | |||
279 | if (isInt()) | |||
280 | IntVal += That.IntVal; | |||
281 | else | |||
282 | getFpVal().add(That.getFpVal(), RndMode); | |||
283 | return; | |||
284 | } | |||
285 | ||||
286 | if (isInt()) { | |||
287 | const APFloat &T = That.getFpVal(); | |||
288 | convertToFpType(T.getSemantics()); | |||
289 | getFpVal().add(T, RndMode); | |||
290 | return; | |||
291 | } | |||
292 | ||||
293 | APFloat &T = getFpVal(); | |||
294 | T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); | |||
295 | } | |||
296 | ||||
297 | void FAddendCoef::operator*=(const FAddendCoef &That) { | |||
298 | if (That.isOne()) | |||
299 | return; | |||
300 | ||||
301 | if (That.isMinusOne()) { | |||
302 | negate(); | |||
303 | return; | |||
304 | } | |||
305 | ||||
306 | if (isInt() && That.isInt()) { | |||
307 | int Res = IntVal * (int)That.IntVal; | |||
308 | assert(!insaneIntVal(Res) && "Insane int value")(static_cast <bool> (!insaneIntVal(Res) && "Insane int value" ) ? void (0) : __assert_fail ("!insaneIntVal(Res) && \"Insane int value\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 308, __extension__ __PRETTY_FUNCTION__)); | |||
309 | IntVal = Res; | |||
310 | return; | |||
311 | } | |||
312 | ||||
313 | const fltSemantics &Semantic = | |||
314 | isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); | |||
315 | ||||
316 | if (isInt()) | |||
317 | convertToFpType(Semantic); | |||
318 | APFloat &F0 = getFpVal(); | |||
319 | ||||
320 | if (That.isInt()) | |||
321 | F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), | |||
322 | APFloat::rmNearestTiesToEven); | |||
323 | else | |||
324 | F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); | |||
325 | } | |||
326 | ||||
327 | void FAddendCoef::negate() { | |||
328 | if (isInt()) | |||
329 | IntVal = 0 - IntVal; | |||
330 | else | |||
331 | getFpVal().changeSign(); | |||
332 | } | |||
333 | ||||
334 | Value *FAddendCoef::getValue(Type *Ty) const { | |||
335 | return isInt() ? | |||
336 | ConstantFP::get(Ty, float(IntVal)) : | |||
337 | ConstantFP::get(Ty->getContext(), getFpVal()); | |||
338 | } | |||
339 | ||||
340 | // The definition of <Val> Addends | |||
341 | // ========================================= | |||
342 | // A + B <1, A>, <1,B> | |||
343 | // A - B <1, A>, <1,B> | |||
344 | // 0 - B <-1, B> | |||
345 | // C * A, <C, A> | |||
346 | // A + C <1, A> <C, NULL> | |||
347 | // 0 +/- 0 <0, NULL> (corner case) | |||
348 | // | |||
349 | // Legend: A and B are not constant, C is constant | |||
350 | unsigned FAddend::drillValueDownOneStep | |||
351 | (Value *Val, FAddend &Addend0, FAddend &Addend1) { | |||
352 | Instruction *I = nullptr; | |||
353 | if (!Val || !(I = dyn_cast<Instruction>(Val))) | |||
354 | return 0; | |||
355 | ||||
356 | unsigned Opcode = I->getOpcode(); | |||
357 | ||||
358 | if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { | |||
359 | ConstantFP *C0, *C1; | |||
360 | Value *Opnd0 = I->getOperand(0); | |||
361 | Value *Opnd1 = I->getOperand(1); | |||
362 | if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) | |||
363 | Opnd0 = nullptr; | |||
364 | ||||
365 | if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) | |||
366 | Opnd1 = nullptr; | |||
367 | ||||
368 | if (Opnd0) { | |||
369 | if (!C0) | |||
370 | Addend0.set(1, Opnd0); | |||
371 | else | |||
372 | Addend0.set(C0, nullptr); | |||
373 | } | |||
374 | ||||
375 | if (Opnd1) { | |||
376 | FAddend &Addend = Opnd0 ? Addend1 : Addend0; | |||
377 | if (!C1) | |||
378 | Addend.set(1, Opnd1); | |||
379 | else | |||
380 | Addend.set(C1, nullptr); | |||
381 | if (Opcode == Instruction::FSub) | |||
382 | Addend.negate(); | |||
383 | } | |||
384 | ||||
385 | if (Opnd0 || Opnd1) | |||
386 | return Opnd0 && Opnd1 ? 2 : 1; | |||
387 | ||||
388 | // Both operands are zero. Weird! | |||
389 | Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr); | |||
390 | return 1; | |||
391 | } | |||
392 | ||||
393 | if (I->getOpcode() == Instruction::FMul) { | |||
394 | Value *V0 = I->getOperand(0); | |||
395 | Value *V1 = I->getOperand(1); | |||
396 | if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { | |||
397 | Addend0.set(C, V1); | |||
398 | return 1; | |||
399 | } | |||
400 | ||||
401 | if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { | |||
402 | Addend0.set(C, V0); | |||
403 | return 1; | |||
404 | } | |||
405 | } | |||
406 | ||||
407 | return 0; | |||
408 | } | |||
409 | ||||
410 | // Try to break *this* addend into two addends. e.g. Suppose this addend is | |||
411 | // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, | |||
412 | // i.e. <2.3, X> and <2.3, Y>. | |||
413 | unsigned FAddend::drillAddendDownOneStep | |||
414 | (FAddend &Addend0, FAddend &Addend1) const { | |||
415 | if (isConstant()) | |||
416 | return 0; | |||
417 | ||||
418 | unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); | |||
419 | if (!BreakNum || Coeff.isOne()) | |||
420 | return BreakNum; | |||
421 | ||||
422 | Addend0.Scale(Coeff); | |||
423 | ||||
424 | if (BreakNum == 2) | |||
425 | Addend1.Scale(Coeff); | |||
426 | ||||
427 | return BreakNum; | |||
428 | } | |||
429 | ||||
430 | // Try to perform following optimization on the input instruction I. Return the | |||
431 | // simplified expression if was successful; otherwise, return 0. | |||
432 | // | |||
433 | // Instruction "I" is Simplified into | |||
434 | // ------------------------------------------------------- | |||
435 | // (x * y) +/- (x * z) x * (y +/- z) | |||
436 | // (y / x) +/- (z / x) (y +/- z) / x | |||
437 | Value *FAddCombine::performFactorization(Instruction *I) { | |||
438 | assert((I->getOpcode() == Instruction::FAdd ||(static_cast <bool> ((I->getOpcode() == Instruction:: FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub" ) ? void (0) : __assert_fail ("(I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && \"Expect add/sub\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 439, __extension__ __PRETTY_FUNCTION__)) | |||
439 | I->getOpcode() == Instruction::FSub) && "Expect add/sub")(static_cast <bool> ((I->getOpcode() == Instruction:: FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub" ) ? void (0) : __assert_fail ("(I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && \"Expect add/sub\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 439, __extension__ __PRETTY_FUNCTION__)); | |||
440 | ||||
441 | Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); | |||
442 | Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); | |||
443 | ||||
444 | if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) | |||
445 | return nullptr; | |||
446 | ||||
447 | bool isMpy = false; | |||
448 | if (I0->getOpcode() == Instruction::FMul) | |||
449 | isMpy = true; | |||
450 | else if (I0->getOpcode() != Instruction::FDiv) | |||
451 | return nullptr; | |||
452 | ||||
453 | Value *Opnd0_0 = I0->getOperand(0); | |||
454 | Value *Opnd0_1 = I0->getOperand(1); | |||
455 | Value *Opnd1_0 = I1->getOperand(0); | |||
456 | Value *Opnd1_1 = I1->getOperand(1); | |||
457 | ||||
458 | // Input Instr I Factor AddSub0 AddSub1 | |||
459 | // ---------------------------------------------- | |||
460 | // (x*y) +/- (x*z) x y z | |||
461 | // (y/x) +/- (z/x) x y z | |||
462 | Value *Factor = nullptr; | |||
463 | Value *AddSub0 = nullptr, *AddSub1 = nullptr; | |||
464 | ||||
465 | if (isMpy) { | |||
466 | if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) | |||
467 | Factor = Opnd0_0; | |||
468 | else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) | |||
469 | Factor = Opnd0_1; | |||
470 | ||||
471 | if (Factor) { | |||
472 | AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; | |||
473 | AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; | |||
474 | } | |||
475 | } else if (Opnd0_1 == Opnd1_1) { | |||
476 | Factor = Opnd0_1; | |||
477 | AddSub0 = Opnd0_0; | |||
478 | AddSub1 = Opnd1_0; | |||
479 | } | |||
480 | ||||
481 | if (!Factor) | |||
482 | return nullptr; | |||
483 | ||||
484 | FastMathFlags Flags; | |||
485 | Flags.setFast(); | |||
486 | if (I0) Flags &= I->getFastMathFlags(); | |||
487 | if (I1) Flags &= I->getFastMathFlags(); | |||
488 | ||||
489 | // Create expression "NewAddSub = AddSub0 +/- AddsSub1" | |||
490 | Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? | |||
491 | createFAdd(AddSub0, AddSub1) : | |||
492 | createFSub(AddSub0, AddSub1); | |||
493 | if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { | |||
494 | const APFloat &F = CFP->getValueAPF(); | |||
495 | if (!F.isNormal()) | |||
496 | return nullptr; | |||
497 | } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub)) | |||
498 | II->setFastMathFlags(Flags); | |||
499 | ||||
500 | if (isMpy) { | |||
501 | Value *RI = createFMul(Factor, NewAddSub); | |||
502 | if (Instruction *II = dyn_cast<Instruction>(RI)) | |||
503 | II->setFastMathFlags(Flags); | |||
504 | return RI; | |||
505 | } | |||
506 | ||||
507 | Value *RI = createFDiv(NewAddSub, Factor); | |||
508 | if (Instruction *II = dyn_cast<Instruction>(RI)) | |||
509 | II->setFastMathFlags(Flags); | |||
510 | return RI; | |||
511 | } | |||
512 | ||||
513 | Value *FAddCombine::simplify(Instruction *I) { | |||
514 | assert(I->hasAllowReassoc() && I->hasNoSignedZeros() &&(static_cast <bool> (I->hasAllowReassoc() && I->hasNoSignedZeros() && "Expected 'reassoc'+'nsz' instruction" ) ? void (0) : __assert_fail ("I->hasAllowReassoc() && I->hasNoSignedZeros() && \"Expected 'reassoc'+'nsz' instruction\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 515, __extension__ __PRETTY_FUNCTION__)) | |||
515 | "Expected 'reassoc'+'nsz' instruction")(static_cast <bool> (I->hasAllowReassoc() && I->hasNoSignedZeros() && "Expected 'reassoc'+'nsz' instruction" ) ? void (0) : __assert_fail ("I->hasAllowReassoc() && I->hasNoSignedZeros() && \"Expected 'reassoc'+'nsz' instruction\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 515, __extension__ __PRETTY_FUNCTION__)); | |||
516 | ||||
517 | // Currently we are not able to handle vector type. | |||
518 | if (I->getType()->isVectorTy()) | |||
519 | return nullptr; | |||
520 | ||||
521 | assert((I->getOpcode() == Instruction::FAdd ||(static_cast <bool> ((I->getOpcode() == Instruction:: FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub" ) ? void (0) : __assert_fail ("(I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && \"Expect add/sub\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 522, __extension__ __PRETTY_FUNCTION__)) | |||
522 | I->getOpcode() == Instruction::FSub) && "Expect add/sub")(static_cast <bool> ((I->getOpcode() == Instruction:: FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub" ) ? void (0) : __assert_fail ("(I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && \"Expect add/sub\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 522, __extension__ __PRETTY_FUNCTION__)); | |||
523 | ||||
524 | // Save the instruction before calling other member-functions. | |||
525 | Instr = I; | |||
526 | ||||
527 | FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; | |||
528 | ||||
529 | unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); | |||
530 | ||||
531 | // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. | |||
532 | unsigned Opnd0_ExpNum = 0; | |||
533 | unsigned Opnd1_ExpNum = 0; | |||
534 | ||||
535 | if (!Opnd0.isConstant()) | |||
536 | Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); | |||
537 | ||||
538 | // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. | |||
539 | if (OpndNum == 2 && !Opnd1.isConstant()) | |||
540 | Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); | |||
541 | ||||
542 | // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 | |||
543 | if (Opnd0_ExpNum && Opnd1_ExpNum) { | |||
544 | AddendVect AllOpnds; | |||
545 | AllOpnds.push_back(&Opnd0_0); | |||
546 | AllOpnds.push_back(&Opnd1_0); | |||
547 | if (Opnd0_ExpNum == 2) | |||
548 | AllOpnds.push_back(&Opnd0_1); | |||
549 | if (Opnd1_ExpNum == 2) | |||
550 | AllOpnds.push_back(&Opnd1_1); | |||
551 | ||||
552 | // Compute instruction quota. We should save at least one instruction. | |||
553 | unsigned InstQuota = 0; | |||
554 | ||||
555 | Value *V0 = I->getOperand(0); | |||
556 | Value *V1 = I->getOperand(1); | |||
557 | InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && | |||
558 | (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; | |||
559 | ||||
560 | if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) | |||
561 | return R; | |||
562 | } | |||
563 | ||||
564 | if (OpndNum != 2) { | |||
565 | // The input instruction is : "I=0.0 +/- V". If the "V" were able to be | |||
566 | // splitted into two addends, say "V = X - Y", the instruction would have | |||
567 | // been optimized into "I = Y - X" in the previous steps. | |||
568 | // | |||
569 | const FAddendCoef &CE = Opnd0.getCoef(); | |||
570 | return CE.isOne() ? Opnd0.getSymVal() : nullptr; | |||
571 | } | |||
572 | ||||
573 | // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] | |||
574 | if (Opnd1_ExpNum) { | |||
575 | AddendVect AllOpnds; | |||
576 | AllOpnds.push_back(&Opnd0); | |||
577 | AllOpnds.push_back(&Opnd1_0); | |||
578 | if (Opnd1_ExpNum == 2) | |||
579 | AllOpnds.push_back(&Opnd1_1); | |||
580 | ||||
581 | if (Value *R = simplifyFAdd(AllOpnds, 1)) | |||
582 | return R; | |||
583 | } | |||
584 | ||||
585 | // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] | |||
586 | if (Opnd0_ExpNum) { | |||
587 | AddendVect AllOpnds; | |||
588 | AllOpnds.push_back(&Opnd1); | |||
589 | AllOpnds.push_back(&Opnd0_0); | |||
590 | if (Opnd0_ExpNum == 2) | |||
591 | AllOpnds.push_back(&Opnd0_1); | |||
592 | ||||
593 | if (Value *R = simplifyFAdd(AllOpnds, 1)) | |||
594 | return R; | |||
595 | } | |||
596 | ||||
597 | // step 6: Try factorization as the last resort, | |||
598 | return performFactorization(I); | |||
599 | } | |||
600 | ||||
601 | Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { | |||
602 | unsigned AddendNum = Addends.size(); | |||
603 | assert(AddendNum <= 4 && "Too many addends")(static_cast <bool> (AddendNum <= 4 && "Too many addends" ) ? void (0) : __assert_fail ("AddendNum <= 4 && \"Too many addends\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 603, __extension__ __PRETTY_FUNCTION__)); | |||
604 | ||||
605 | // For saving intermediate results; | |||
606 | unsigned NextTmpIdx = 0; | |||
607 | FAddend TmpResult[3]; | |||
608 | ||||
609 | // Points to the constant addend of the resulting simplified expression. | |||
610 | // If the resulting expr has constant-addend, this constant-addend is | |||
611 | // desirable to reside at the top of the resulting expression tree. Placing | |||
612 | // constant close to supper-expr(s) will potentially reveal some optimization | |||
613 | // opportunities in super-expr(s). | |||
614 | const FAddend *ConstAdd = nullptr; | |||
615 | ||||
616 | // Simplified addends are placed <SimpVect>. | |||
617 | AddendVect SimpVect; | |||
618 | ||||
619 | // The outer loop works on one symbolic-value at a time. Suppose the input | |||
620 | // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... | |||
621 | // The symbolic-values will be processed in this order: x, y, z. | |||
622 | for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { | |||
623 | ||||
624 | const FAddend *ThisAddend = Addends[SymIdx]; | |||
625 | if (!ThisAddend) { | |||
626 | // This addend was processed before. | |||
627 | continue; | |||
628 | } | |||
629 | ||||
630 | Value *Val = ThisAddend->getSymVal(); | |||
631 | unsigned StartIdx = SimpVect.size(); | |||
632 | SimpVect.push_back(ThisAddend); | |||
633 | ||||
634 | // The inner loop collects addends sharing same symbolic-value, and these | |||
635 | // addends will be later on folded into a single addend. Following above | |||
636 | // example, if the symbolic value "y" is being processed, the inner loop | |||
637 | // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will | |||
638 | // be later on folded into "<b1+b2, y>". | |||
639 | for (unsigned SameSymIdx = SymIdx + 1; | |||
640 | SameSymIdx < AddendNum; SameSymIdx++) { | |||
641 | const FAddend *T = Addends[SameSymIdx]; | |||
642 | if (T && T->getSymVal() == Val) { | |||
643 | // Set null such that next iteration of the outer loop will not process | |||
644 | // this addend again. | |||
645 | Addends[SameSymIdx] = nullptr; | |||
646 | SimpVect.push_back(T); | |||
647 | } | |||
648 | } | |||
649 | ||||
650 | // If multiple addends share same symbolic value, fold them together. | |||
651 | if (StartIdx + 1 != SimpVect.size()) { | |||
652 | FAddend &R = TmpResult[NextTmpIdx ++]; | |||
653 | R = *SimpVect[StartIdx]; | |||
654 | for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) | |||
655 | R += *SimpVect[Idx]; | |||
656 | ||||
657 | // Pop all addends being folded and push the resulting folded addend. | |||
658 | SimpVect.resize(StartIdx); | |||
659 | if (Val) { | |||
660 | if (!R.isZero()) { | |||
661 | SimpVect.push_back(&R); | |||
662 | } | |||
663 | } else { | |||
664 | // Don't push constant addend at this time. It will be the last element | |||
665 | // of <SimpVect>. | |||
666 | ConstAdd = &R; | |||
667 | } | |||
668 | } | |||
669 | } | |||
670 | ||||
671 | assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&(static_cast <bool> ((NextTmpIdx <= array_lengthof(TmpResult ) + 1) && "out-of-bound access") ? void (0) : __assert_fail ("(NextTmpIdx <= array_lengthof(TmpResult) + 1) && \"out-of-bound access\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 672, __extension__ __PRETTY_FUNCTION__)) | |||
672 | "out-of-bound access")(static_cast <bool> ((NextTmpIdx <= array_lengthof(TmpResult ) + 1) && "out-of-bound access") ? void (0) : __assert_fail ("(NextTmpIdx <= array_lengthof(TmpResult) + 1) && \"out-of-bound access\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 672, __extension__ __PRETTY_FUNCTION__)); | |||
673 | ||||
674 | if (ConstAdd) | |||
675 | SimpVect.push_back(ConstAdd); | |||
676 | ||||
677 | Value *Result; | |||
678 | if (!SimpVect.empty()) | |||
679 | Result = createNaryFAdd(SimpVect, InstrQuota); | |||
680 | else { | |||
681 | // The addition is folded to 0.0. | |||
682 | Result = ConstantFP::get(Instr->getType(), 0.0); | |||
683 | } | |||
684 | ||||
685 | return Result; | |||
686 | } | |||
687 | ||||
688 | Value *FAddCombine::createNaryFAdd | |||
689 | (const AddendVect &Opnds, unsigned InstrQuota) { | |||
690 | assert(!Opnds.empty() && "Expect at least one addend")(static_cast <bool> (!Opnds.empty() && "Expect at least one addend" ) ? void (0) : __assert_fail ("!Opnds.empty() && \"Expect at least one addend\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 690, __extension__ __PRETTY_FUNCTION__)); | |||
691 | ||||
692 | // Step 1: Check if the # of instructions needed exceeds the quota. | |||
693 | ||||
694 | unsigned InstrNeeded = calcInstrNumber(Opnds); | |||
695 | if (InstrNeeded > InstrQuota) | |||
696 | return nullptr; | |||
697 | ||||
698 | initCreateInstNum(); | |||
699 | ||||
700 | // step 2: Emit the N-ary addition. | |||
701 | // Note that at most three instructions are involved in Fadd-InstCombine: the | |||
702 | // addition in question, and at most two neighboring instructions. | |||
703 | // The resulting optimized addition should have at least one less instruction | |||
704 | // than the original addition expression tree. This implies that the resulting | |||
705 | // N-ary addition has at most two instructions, and we don't need to worry | |||
706 | // about tree-height when constructing the N-ary addition. | |||
707 | ||||
708 | Value *LastVal = nullptr; | |||
709 | bool LastValNeedNeg = false; | |||
710 | ||||
711 | // Iterate the addends, creating fadd/fsub using adjacent two addends. | |||
712 | for (const FAddend *Opnd : Opnds) { | |||
713 | bool NeedNeg; | |||
714 | Value *V = createAddendVal(*Opnd, NeedNeg); | |||
715 | if (!LastVal) { | |||
716 | LastVal = V; | |||
717 | LastValNeedNeg = NeedNeg; | |||
718 | continue; | |||
719 | } | |||
720 | ||||
721 | if (LastValNeedNeg == NeedNeg) { | |||
722 | LastVal = createFAdd(LastVal, V); | |||
723 | continue; | |||
724 | } | |||
725 | ||||
726 | if (LastValNeedNeg) | |||
727 | LastVal = createFSub(V, LastVal); | |||
728 | else | |||
729 | LastVal = createFSub(LastVal, V); | |||
730 | ||||
731 | LastValNeedNeg = false; | |||
732 | } | |||
733 | ||||
734 | if (LastValNeedNeg) { | |||
735 | LastVal = createFNeg(LastVal); | |||
736 | } | |||
737 | ||||
738 | #ifndef NDEBUG | |||
739 | assert(CreateInstrNum == InstrNeeded &&(static_cast <bool> (CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers") ? void (0) : __assert_fail ("CreateInstrNum == InstrNeeded && \"Inconsistent in instruction numbers\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 740, __extension__ __PRETTY_FUNCTION__)) | |||
740 | "Inconsistent in instruction numbers")(static_cast <bool> (CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers") ? void (0) : __assert_fail ("CreateInstrNum == InstrNeeded && \"Inconsistent in instruction numbers\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 740, __extension__ __PRETTY_FUNCTION__)); | |||
741 | #endif | |||
742 | ||||
743 | return LastVal; | |||
744 | } | |||
745 | ||||
746 | Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { | |||
747 | Value *V = Builder.CreateFSub(Opnd0, Opnd1); | |||
748 | if (Instruction *I = dyn_cast<Instruction>(V)) | |||
749 | createInstPostProc(I); | |||
750 | return V; | |||
751 | } | |||
752 | ||||
753 | Value *FAddCombine::createFNeg(Value *V) { | |||
754 | Value *Zero = cast<Value>(ConstantFP::getZeroValueForNegation(V->getType())); | |||
755 | Value *NewV = createFSub(Zero, V); | |||
756 | if (Instruction *I = dyn_cast<Instruction>(NewV)) | |||
757 | createInstPostProc(I, true); // fneg's don't receive instruction numbers. | |||
758 | return NewV; | |||
759 | } | |||
760 | ||||
761 | Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { | |||
762 | Value *V = Builder.CreateFAdd(Opnd0, Opnd1); | |||
763 | if (Instruction *I = dyn_cast<Instruction>(V)) | |||
764 | createInstPostProc(I); | |||
765 | return V; | |||
766 | } | |||
767 | ||||
768 | Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { | |||
769 | Value *V = Builder.CreateFMul(Opnd0, Opnd1); | |||
770 | if (Instruction *I = dyn_cast<Instruction>(V)) | |||
771 | createInstPostProc(I); | |||
772 | return V; | |||
773 | } | |||
774 | ||||
775 | Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { | |||
776 | Value *V = Builder.CreateFDiv(Opnd0, Opnd1); | |||
777 | if (Instruction *I = dyn_cast<Instruction>(V)) | |||
778 | createInstPostProc(I); | |||
779 | return V; | |||
780 | } | |||
781 | ||||
782 | void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) { | |||
783 | NewInstr->setDebugLoc(Instr->getDebugLoc()); | |||
784 | ||||
785 | // Keep track of the number of instruction created. | |||
786 | if (!NoNumber) | |||
787 | incCreateInstNum(); | |||
788 | ||||
789 | // Propagate fast-math flags | |||
790 | NewInstr->setFastMathFlags(Instr->getFastMathFlags()); | |||
791 | } | |||
792 | ||||
793 | // Return the number of instruction needed to emit the N-ary addition. | |||
794 | // NOTE: Keep this function in sync with createAddendVal(). | |||
795 | unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { | |||
796 | unsigned OpndNum = Opnds.size(); | |||
797 | unsigned InstrNeeded = OpndNum - 1; | |||
798 | ||||
799 | // The number of addends in the form of "(-1)*x". | |||
800 | unsigned NegOpndNum = 0; | |||
801 | ||||
802 | // Adjust the number of instructions needed to emit the N-ary add. | |||
803 | for (const FAddend *Opnd : Opnds) { | |||
804 | if (Opnd->isConstant()) | |||
805 | continue; | |||
806 | ||||
807 | // The constant check above is really for a few special constant | |||
808 | // coefficients. | |||
809 | if (isa<UndefValue>(Opnd->getSymVal())) | |||
810 | continue; | |||
811 | ||||
812 | const FAddendCoef &CE = Opnd->getCoef(); | |||
813 | if (CE.isMinusOne() || CE.isMinusTwo()) | |||
814 | NegOpndNum++; | |||
815 | ||||
816 | // Let the addend be "c * x". If "c == +/-1", the value of the addend | |||
817 | // is immediately available; otherwise, it needs exactly one instruction | |||
818 | // to evaluate the value. | |||
819 | if (!CE.isMinusOne() && !CE.isOne()) | |||
820 | InstrNeeded++; | |||
821 | } | |||
822 | if (NegOpndNum == OpndNum) | |||
823 | InstrNeeded++; | |||
824 | return InstrNeeded; | |||
825 | } | |||
826 | ||||
827 | // Input Addend Value NeedNeg(output) | |||
828 | // ================================================================ | |||
829 | // Constant C C false | |||
830 | // <+/-1, V> V coefficient is -1 | |||
831 | // <2/-2, V> "fadd V, V" coefficient is -2 | |||
832 | // <C, V> "fmul V, C" false | |||
833 | // | |||
834 | // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. | |||
835 | Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { | |||
836 | const FAddendCoef &Coeff = Opnd.getCoef(); | |||
837 | ||||
838 | if (Opnd.isConstant()) { | |||
839 | NeedNeg = false; | |||
840 | return Coeff.getValue(Instr->getType()); | |||
841 | } | |||
842 | ||||
843 | Value *OpndVal = Opnd.getSymVal(); | |||
844 | ||||
845 | if (Coeff.isMinusOne() || Coeff.isOne()) { | |||
846 | NeedNeg = Coeff.isMinusOne(); | |||
847 | return OpndVal; | |||
848 | } | |||
849 | ||||
850 | if (Coeff.isTwo() || Coeff.isMinusTwo()) { | |||
851 | NeedNeg = Coeff.isMinusTwo(); | |||
852 | return createFAdd(OpndVal, OpndVal); | |||
853 | } | |||
854 | ||||
855 | NeedNeg = false; | |||
856 | return createFMul(OpndVal, Coeff.getValue(Instr->getType())); | |||
857 | } | |||
858 | ||||
859 | // Checks if any operand is negative and we can convert add to sub. | |||
860 | // This function checks for following negative patterns | |||
861 | // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) | |||
862 | // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) | |||
863 | // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even | |||
864 | static Value *checkForNegativeOperand(BinaryOperator &I, | |||
865 | InstCombiner::BuilderTy &Builder) { | |||
866 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | |||
867 | ||||
868 | // This function creates 2 instructions to replace ADD, we need at least one | |||
869 | // of LHS or RHS to have one use to ensure benefit in transform. | |||
870 | if (!LHS->hasOneUse() && !RHS->hasOneUse()) | |||
871 | return nullptr; | |||
872 | ||||
873 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; | |||
874 | const APInt *C1 = nullptr, *C2 = nullptr; | |||
875 | ||||
876 | // if ONE is on other side, swap | |||
877 | if (match(RHS, m_Add(m_Value(X), m_One()))) | |||
878 | std::swap(LHS, RHS); | |||
879 | ||||
880 | if (match(LHS, m_Add(m_Value(X), m_One()))) { | |||
881 | // if XOR on other side, swap | |||
882 | if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) | |||
883 | std::swap(X, RHS); | |||
884 | ||||
885 | if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) { | |||
886 | // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) | |||
887 | // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) | |||
888 | if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { | |||
889 | Value *NewAnd = Builder.CreateAnd(Z, *C1); | |||
890 | return Builder.CreateSub(RHS, NewAnd, "sub"); | |||
891 | } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { | |||
892 | // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) | |||
893 | // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) | |||
894 | Value *NewOr = Builder.CreateOr(Z, ~(*C1)); | |||
895 | return Builder.CreateSub(RHS, NewOr, "sub"); | |||
896 | } | |||
897 | } | |||
898 | } | |||
899 | ||||
900 | // Restore LHS and RHS | |||
901 | LHS = I.getOperand(0); | |||
902 | RHS = I.getOperand(1); | |||
903 | ||||
904 | // if XOR is on other side, swap | |||
905 | if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) | |||
906 | std::swap(LHS, RHS); | |||
907 | ||||
908 | // C2 is ODD | |||
909 | // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2)) | |||
910 | // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) | |||
911 | if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) | |||
912 | if (C1->countTrailingZeros() == 0) | |||
913 | if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { | |||
914 | Value *NewOr = Builder.CreateOr(Z, ~(*C2)); | |||
915 | return Builder.CreateSub(RHS, NewOr, "sub"); | |||
916 | } | |||
917 | return nullptr; | |||
918 | } | |||
919 | ||||
920 | Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) { | |||
921 | Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1); | |||
922 | Constant *Op1C; | |||
923 | if (!match(Op1, m_Constant(Op1C))) | |||
924 | return nullptr; | |||
925 | ||||
926 | if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add)) | |||
927 | return NV; | |||
928 | ||||
929 | Value *X, *Y; | |||
930 | ||||
931 | // add (sub X, Y), -1 --> add (not Y), X | |||
932 | if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) && | |||
933 | match(Op1, m_AllOnes())) | |||
934 | return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X); | |||
935 | ||||
936 | // zext(bool) + C -> bool ? C + 1 : C | |||
937 | if (match(Op0, m_ZExt(m_Value(X))) && | |||
938 | X->getType()->getScalarSizeInBits() == 1) | |||
939 | return SelectInst::Create(X, AddOne(Op1C), Op1); | |||
940 | ||||
941 | // ~X + C --> (C-1) - X | |||
942 | if (match(Op0, m_Not(m_Value(X)))) | |||
943 | return BinaryOperator::CreateSub(SubOne(Op1C), X); | |||
944 | ||||
945 | const APInt *C; | |||
946 | if (!match(Op1, m_APInt(C))) | |||
947 | return nullptr; | |||
948 | ||||
949 | if (C->isSignMask()) { | |||
950 | // If wrapping is not allowed, then the addition must set the sign bit: | |||
951 | // X + (signmask) --> X | signmask | |||
952 | if (Add.hasNoSignedWrap() || Add.hasNoUnsignedWrap()) | |||
953 | return BinaryOperator::CreateOr(Op0, Op1); | |||
954 | ||||
955 | // If wrapping is allowed, then the addition flips the sign bit of LHS: | |||
956 | // X + (signmask) --> X ^ signmask | |||
957 | return BinaryOperator::CreateXor(Op0, Op1); | |||
958 | } | |||
959 | ||||
960 | // Is this add the last step in a convoluted sext? | |||
961 | // add(zext(xor i16 X, -32768), -32768) --> sext X | |||
962 | Type *Ty = Add.getType(); | |||
963 | const APInt *C2; | |||
964 | if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) && | |||
965 | C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C) | |||
966 | return CastInst::Create(Instruction::SExt, X, Ty); | |||
967 | ||||
968 | // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C)) | |||
969 | if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) && | |||
970 | C->isNegative() && C->sge(-C2->sext(C->getBitWidth()))) { | |||
971 | Constant *NewC = | |||
972 | ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth())); | |||
973 | return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); | |||
974 | } | |||
975 | ||||
976 | if (C->isOneValue() && Op0->hasOneUse()) { | |||
977 | // add (sext i1 X), 1 --> zext (not X) | |||
978 | // TODO: The smallest IR representation is (select X, 0, 1), and that would | |||
979 | // not require the one-use check. But we need to remove a transform in | |||
980 | // visitSelect and make sure that IR value tracking for select is equal or | |||
981 | // better than for these ops. | |||
982 | if (match(Op0, m_SExt(m_Value(X))) && | |||
983 | X->getType()->getScalarSizeInBits() == 1) | |||
984 | return new ZExtInst(Builder.CreateNot(X), Ty); | |||
985 | ||||
986 | // Shifts and add used to flip and mask off the low bit: | |||
987 | // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 | |||
988 | const APInt *C3; | |||
989 | if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) && | |||
990 | C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { | |||
991 | Value *NotX = Builder.CreateNot(X); | |||
992 | return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); | |||
993 | } | |||
994 | } | |||
995 | ||||
996 | return nullptr; | |||
997 | } | |||
998 | ||||
999 | // Matches multiplication expression Op * C where C is a constant. Returns the | |||
1000 | // constant value in C and the other operand in Op. Returns true if such a | |||
1001 | // match is found. | |||
1002 | static bool MatchMul(Value *E, Value *&Op, APInt &C) { | |||
1003 | const APInt *AI; | |||
1004 | if (match(E, m_Mul(m_Value(Op), m_APInt(AI)))) { | |||
1005 | C = *AI; | |||
1006 | return true; | |||
1007 | } | |||
1008 | if (match(E, m_Shl(m_Value(Op), m_APInt(AI)))) { | |||
1009 | C = APInt(AI->getBitWidth(), 1); | |||
1010 | C <<= *AI; | |||
1011 | return true; | |||
1012 | } | |||
1013 | return false; | |||
1014 | } | |||
1015 | ||||
1016 | // Matches remainder expression Op % C where C is a constant. Returns the | |||
1017 | // constant value in C and the other operand in Op. Returns the signedness of | |||
1018 | // the remainder operation in IsSigned. Returns true if such a match is | |||
1019 | // found. | |||
1020 | static bool MatchRem(Value *E, Value *&Op, APInt &C, bool &IsSigned) { | |||
1021 | const APInt *AI; | |||
1022 | IsSigned = false; | |||
1023 | if (match(E, m_SRem(m_Value(Op), m_APInt(AI)))) { | |||
1024 | IsSigned = true; | |||
1025 | C = *AI; | |||
1026 | return true; | |||
1027 | } | |||
1028 | if (match(E, m_URem(m_Value(Op), m_APInt(AI)))) { | |||
1029 | C = *AI; | |||
1030 | return true; | |||
1031 | } | |||
1032 | if (match(E, m_And(m_Value(Op), m_APInt(AI))) && (*AI + 1).isPowerOf2()) { | |||
1033 | C = *AI + 1; | |||
1034 | return true; | |||
1035 | } | |||
1036 | return false; | |||
1037 | } | |||
1038 | ||||
1039 | // Matches division expression Op / C with the given signedness as indicated | |||
1040 | // by IsSigned, where C is a constant. Returns the constant value in C and the | |||
1041 | // other operand in Op. Returns true if such a match is found. | |||
1042 | static bool MatchDiv(Value *E, Value *&Op, APInt &C, bool IsSigned) { | |||
1043 | const APInt *AI; | |||
1044 | if (IsSigned && match(E, m_SDiv(m_Value(Op), m_APInt(AI)))) { | |||
1045 | C = *AI; | |||
1046 | return true; | |||
1047 | } | |||
1048 | if (!IsSigned) { | |||
1049 | if (match(E, m_UDiv(m_Value(Op), m_APInt(AI)))) { | |||
1050 | C = *AI; | |||
1051 | return true; | |||
1052 | } | |||
1053 | if (match(E, m_LShr(m_Value(Op), m_APInt(AI)))) { | |||
1054 | C = APInt(AI->getBitWidth(), 1); | |||
1055 | C <<= *AI; | |||
1056 | return true; | |||
1057 | } | |||
1058 | } | |||
1059 | return false; | |||
1060 | } | |||
1061 | ||||
1062 | // Returns whether C0 * C1 with the given signedness overflows. | |||
1063 | static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) { | |||
1064 | bool overflow; | |||
1065 | if (IsSigned) | |||
1066 | (void)C0.smul_ov(C1, overflow); | |||
1067 | else | |||
1068 | (void)C0.umul_ov(C1, overflow); | |||
1069 | return overflow; | |||
1070 | } | |||
1071 | ||||
1072 | // Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1) | |||
1073 | // does not overflow. | |||
1074 | Value *InstCombiner::SimplifyAddWithRemainder(BinaryOperator &I) { | |||
1075 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | |||
1076 | Value *X, *MulOpV; | |||
1077 | APInt C0, MulOpC; | |||
1078 | bool IsSigned; | |||
1079 | // Match I = X % C0 + MulOpV * C0 | |||
1080 | if (((MatchRem(LHS, X, C0, IsSigned) && MatchMul(RHS, MulOpV, MulOpC)) || | |||
1081 | (MatchRem(RHS, X, C0, IsSigned) && MatchMul(LHS, MulOpV, MulOpC))) && | |||
1082 | C0 == MulOpC) { | |||
1083 | Value *RemOpV; | |||
1084 | APInt C1; | |||
1085 | bool Rem2IsSigned; | |||
1086 | // Match MulOpC = RemOpV % C1 | |||
1087 | if (MatchRem(MulOpV, RemOpV, C1, Rem2IsSigned) && | |||
1088 | IsSigned == Rem2IsSigned) { | |||
1089 | Value *DivOpV; | |||
1090 | APInt DivOpC; | |||
1091 | // Match RemOpV = X / C0 | |||
1092 | if (MatchDiv(RemOpV, DivOpV, DivOpC, IsSigned) && X == DivOpV && | |||
1093 | C0 == DivOpC && !MulWillOverflow(C0, C1, IsSigned)) { | |||
1094 | Value *NewDivisor = | |||
1095 | ConstantInt::get(X->getType()->getContext(), C0 * C1); | |||
1096 | return IsSigned ? Builder.CreateSRem(X, NewDivisor, "srem") | |||
1097 | : Builder.CreateURem(X, NewDivisor, "urem"); | |||
1098 | } | |||
1099 | } | |||
1100 | } | |||
1101 | ||||
1102 | return nullptr; | |||
1103 | } | |||
1104 | ||||
1105 | /// Fold | |||
1106 | /// (1 << NBits) - 1 | |||
1107 | /// Into: | |||
1108 | /// ~(-(1 << NBits)) | |||
1109 | /// Because a 'not' is better for bit-tracking analysis and other transforms | |||
1110 | /// than an 'add'. The new shl is always nsw, and is nuw if old `and` was. | |||
1111 | static Instruction *canonicalizeLowbitMask(BinaryOperator &I, | |||
1112 | InstCombiner::BuilderTy &Builder) { | |||
1113 | Value *NBits; | |||
1114 | if (!match(&I, m_Add(m_OneUse(m_Shl(m_One(), m_Value(NBits))), m_AllOnes()))) | |||
1115 | return nullptr; | |||
1116 | ||||
1117 | Constant *MinusOne = Constant::getAllOnesValue(NBits->getType()); | |||
1118 | Value *NotMask = Builder.CreateShl(MinusOne, NBits, "notmask"); | |||
1119 | // Be wary of constant folding. | |||
1120 | if (auto *BOp = dyn_cast<BinaryOperator>(NotMask)) { | |||
1121 | // Always NSW. But NUW propagates from `add`. | |||
1122 | BOp->setHasNoSignedWrap(); | |||
1123 | BOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); | |||
1124 | } | |||
1125 | ||||
1126 | return BinaryOperator::CreateNot(NotMask, I.getName()); | |||
1127 | } | |||
1128 | ||||
1129 | Instruction *InstCombiner::visitAdd(BinaryOperator &I) { | |||
1130 | if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), | |||
1131 | I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), | |||
1132 | SQ.getWithInstruction(&I))) | |||
1133 | return replaceInstUsesWith(I, V); | |||
1134 | ||||
1135 | if (SimplifyAssociativeOrCommutative(I)) | |||
1136 | return &I; | |||
1137 | ||||
1138 | if (Instruction *X = foldShuffledBinop(I)) | |||
1139 | return X; | |||
1140 | ||||
1141 | // (A*B)+(A*C) -> A*(B+C) etc | |||
1142 | if (Value *V = SimplifyUsingDistributiveLaws(I)) | |||
1143 | return replaceInstUsesWith(I, V); | |||
1144 | ||||
1145 | if (Instruction *X = foldAddWithConstant(I)) | |||
1146 | return X; | |||
1147 | ||||
1148 | // FIXME: This should be moved into the above helper function to allow these | |||
1149 | // transforms for general constant or constant splat vectors. | |||
1150 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | |||
1151 | Type *Ty = I.getType(); | |||
1152 | if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { | |||
1153 | Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; | |||
1154 | if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { | |||
1155 | unsigned TySizeBits = Ty->getScalarSizeInBits(); | |||
1156 | const APInt &RHSVal = CI->getValue(); | |||
1157 | unsigned ExtendAmt = 0; | |||
1158 | // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. | |||
1159 | // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. | |||
1160 | if (XorRHS->getValue() == -RHSVal) { | |||
1161 | if (RHSVal.isPowerOf2()) | |||
1162 | ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; | |||
1163 | else if (XorRHS->getValue().isPowerOf2()) | |||
1164 | ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; | |||
1165 | } | |||
1166 | ||||
1167 | if (ExtendAmt) { | |||
1168 | APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); | |||
1169 | if (!MaskedValueIsZero(XorLHS, Mask, 0, &I)) | |||
1170 | ExtendAmt = 0; | |||
1171 | } | |||
1172 | ||||
1173 | if (ExtendAmt) { | |||
1174 | Constant *ShAmt = ConstantInt::get(Ty, ExtendAmt); | |||
1175 | Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); | |||
1176 | return BinaryOperator::CreateAShr(NewShl, ShAmt); | |||
1177 | } | |||
1178 | ||||
1179 | // If this is a xor that was canonicalized from a sub, turn it back into | |||
1180 | // a sub and fuse this add with it. | |||
1181 | if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { | |||
1182 | KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I); | |||
1183 | if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) | |||
1184 | return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), | |||
1185 | XorLHS); | |||
1186 | } | |||
1187 | // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, | |||
1188 | // transform them into (X + (signmask ^ C)) | |||
1189 | if (XorRHS->getValue().isSignMask()) | |||
1190 | return BinaryOperator::CreateAdd(XorLHS, | |||
1191 | ConstantExpr::getXor(XorRHS, CI)); | |||
1192 | } | |||
1193 | } | |||
1194 | ||||
1195 | if (Ty->isIntOrIntVectorTy(1)) | |||
1196 | return BinaryOperator::CreateXor(LHS, RHS); | |||
1197 | ||||
1198 | // X + X --> X << 1 | |||
1199 | if (LHS == RHS) { | |||
1200 | auto *Shl = BinaryOperator::CreateShl(LHS, ConstantInt::get(Ty, 1)); | |||
1201 | Shl->setHasNoSignedWrap(I.hasNoSignedWrap()); | |||
1202 | Shl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); | |||
1203 | return Shl; | |||
1204 | } | |||
1205 | ||||
1206 | Value *A, *B; | |||
1207 | if (match(LHS, m_Neg(m_Value(A)))) { | |||
1208 | // -A + -B --> -(A + B) | |||
1209 | if (match(RHS, m_Neg(m_Value(B)))) | |||
1210 | return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B)); | |||
1211 | ||||
1212 | // -A + B --> B - A | |||
1213 | return BinaryOperator::CreateSub(RHS, A); | |||
1214 | } | |||
1215 | ||||
1216 | // A + -B --> A - B | |||
1217 | if (match(RHS, m_Neg(m_Value(B)))) | |||
1218 | return BinaryOperator::CreateSub(LHS, B); | |||
1219 | ||||
1220 | if (Value *V = checkForNegativeOperand(I, Builder)) | |||
1221 | return replaceInstUsesWith(I, V); | |||
1222 | ||||
1223 | // (A + 1) + ~B --> A - B | |||
1224 | // ~B + (A + 1) --> A - B | |||
1225 | if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B))))) | |||
1226 | return BinaryOperator::CreateSub(A, B); | |||
1227 | ||||
1228 | // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1) | |||
1229 | if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V); | |||
1230 | ||||
1231 | // A+B --> A|B iff A and B have no bits set in common. | |||
1232 | if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) | |||
1233 | return BinaryOperator::CreateOr(LHS, RHS); | |||
1234 | ||||
1235 | // FIXME: We already did a check for ConstantInt RHS above this. | |||
1236 | // FIXME: Is this pattern covered by another fold? No regression tests fail on | |||
1237 | // removal. | |||
1238 | if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { | |||
1239 | // (X & FF00) + xx00 -> (X+xx00) & FF00 | |||
1240 | Value *X; | |||
1241 | ConstantInt *C2; | |||
1242 | if (LHS->hasOneUse() && | |||
1243 | match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && | |||
1244 | CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { | |||
1245 | // See if all bits from the first bit set in the Add RHS up are included | |||
1246 | // in the mask. First, get the rightmost bit. | |||
1247 | const APInt &AddRHSV = CRHS->getValue(); | |||
1248 | ||||
1249 | // Form a mask of all bits from the lowest bit added through the top. | |||
1250 | APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); | |||
1251 | ||||
1252 | // See if the and mask includes all of these bits. | |||
1253 | APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); | |||
1254 | ||||
1255 | if (AddRHSHighBits == AddRHSHighBitsAnd) { | |||
1256 | // Okay, the xform is safe. Insert the new add pronto. | |||
1257 | Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName()); | |||
1258 | return BinaryOperator::CreateAnd(NewAdd, C2); | |||
1259 | } | |||
1260 | } | |||
1261 | } | |||
1262 | ||||
1263 | // add (select X 0 (sub n A)) A --> select X A n | |||
1264 | { | |||
1265 | SelectInst *SI = dyn_cast<SelectInst>(LHS); | |||
1266 | Value *A = RHS; | |||
1267 | if (!SI) { | |||
1268 | SI = dyn_cast<SelectInst>(RHS); | |||
1269 | A = LHS; | |||
1270 | } | |||
1271 | if (SI && SI->hasOneUse()) { | |||
1272 | Value *TV = SI->getTrueValue(); | |||
1273 | Value *FV = SI->getFalseValue(); | |||
1274 | Value *N; | |||
1275 | ||||
1276 | // Can we fold the add into the argument of the select? | |||
1277 | // We check both true and false select arguments for a matching subtract. | |||
1278 | if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) | |||
1279 | // Fold the add into the true select value. | |||
1280 | return SelectInst::Create(SI->getCondition(), N, A); | |||
1281 | ||||
1282 | if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) | |||
1283 | // Fold the add into the false select value. | |||
1284 | return SelectInst::Create(SI->getCondition(), A, N); | |||
1285 | } | |||
1286 | } | |||
1287 | ||||
1288 | // Check for (add (sext x), y), see if we can merge this into an | |||
1289 | // integer add followed by a sext. | |||
1290 | if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { | |||
1291 | // (add (sext x), cst) --> (sext (add x, cst')) | |||
1292 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { | |||
1293 | if (LHSConv->hasOneUse()) { | |||
1294 | Constant *CI = | |||
1295 | ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); | |||
1296 | if (ConstantExpr::getSExt(CI, Ty) == RHSC && | |||
1297 | willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { | |||
1298 | // Insert the new, smaller add. | |||
1299 | Value *NewAdd = | |||
1300 | Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); | |||
1301 | return new SExtInst(NewAdd, Ty); | |||
1302 | } | |||
1303 | } | |||
1304 | } | |||
1305 | ||||
1306 | // (add (sext x), (sext y)) --> (sext (add int x, y)) | |||
1307 | if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { | |||
1308 | // Only do this if x/y have the same type, if at least one of them has a | |||
1309 | // single use (so we don't increase the number of sexts), and if the | |||
1310 | // integer add will not overflow. | |||
1311 | if (LHSConv->getOperand(0)->getType() == | |||
1312 | RHSConv->getOperand(0)->getType() && | |||
1313 | (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && | |||
1314 | willNotOverflowSignedAdd(LHSConv->getOperand(0), | |||
1315 | RHSConv->getOperand(0), I)) { | |||
1316 | // Insert the new integer add. | |||
1317 | Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), | |||
1318 | RHSConv->getOperand(0), "addconv"); | |||
1319 | return new SExtInst(NewAdd, Ty); | |||
1320 | } | |||
1321 | } | |||
1322 | } | |||
1323 | ||||
1324 | // Check for (add (zext x), y), see if we can merge this into an | |||
1325 | // integer add followed by a zext. | |||
1326 | if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) { | |||
1327 | // (add (zext x), cst) --> (zext (add x, cst')) | |||
1328 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { | |||
1329 | if (LHSConv->hasOneUse()) { | |||
1330 | Constant *CI = | |||
1331 | ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); | |||
1332 | if (ConstantExpr::getZExt(CI, Ty) == RHSC && | |||
1333 | willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { | |||
1334 | // Insert the new, smaller add. | |||
1335 | Value *NewAdd = | |||
1336 | Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); | |||
1337 | return new ZExtInst(NewAdd, Ty); | |||
1338 | } | |||
1339 | } | |||
1340 | } | |||
1341 | ||||
1342 | // (add (zext x), (zext y)) --> (zext (add int x, y)) | |||
1343 | if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) { | |||
1344 | // Only do this if x/y have the same type, if at least one of them has a | |||
1345 | // single use (so we don't increase the number of zexts), and if the | |||
1346 | // integer add will not overflow. | |||
1347 | if (LHSConv->getOperand(0)->getType() == | |||
1348 | RHSConv->getOperand(0)->getType() && | |||
1349 | (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && | |||
1350 | willNotOverflowUnsignedAdd(LHSConv->getOperand(0), | |||
1351 | RHSConv->getOperand(0), I)) { | |||
1352 | // Insert the new integer add. | |||
1353 | Value *NewAdd = Builder.CreateNUWAdd( | |||
1354 | LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); | |||
1355 | return new ZExtInst(NewAdd, Ty); | |||
1356 | } | |||
1357 | } | |||
1358 | } | |||
1359 | ||||
1360 | // (add (xor A, B) (and A, B)) --> (or A, B) | |||
1361 | // (add (and A, B) (xor A, B)) --> (or A, B) | |||
1362 | if (match(&I, m_c_BinOp(m_Xor(m_Value(A), m_Value(B)), | |||
1363 | m_c_And(m_Deferred(A), m_Deferred(B))))) | |||
1364 | return BinaryOperator::CreateOr(A, B); | |||
1365 | ||||
1366 | // (add (or A, B) (and A, B)) --> (add A, B) | |||
1367 | // (add (and A, B) (or A, B)) --> (add A, B) | |||
1368 | if (match(&I, m_c_BinOp(m_Or(m_Value(A), m_Value(B)), | |||
1369 | m_c_And(m_Deferred(A), m_Deferred(B))))) { | |||
1370 | I.setOperand(0, A); | |||
1371 | I.setOperand(1, B); | |||
1372 | return &I; | |||
1373 | } | |||
1374 | ||||
1375 | // TODO(jingyue): Consider willNotOverflowSignedAdd and | |||
1376 | // willNotOverflowUnsignedAdd to reduce the number of invocations of | |||
1377 | // computeKnownBits. | |||
1378 | bool Changed = false; | |||
1379 | if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) { | |||
1380 | Changed = true; | |||
1381 | I.setHasNoSignedWrap(true); | |||
1382 | } | |||
1383 | if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) { | |||
1384 | Changed = true; | |||
1385 | I.setHasNoUnsignedWrap(true); | |||
1386 | } | |||
1387 | ||||
1388 | if (Instruction *V = canonicalizeLowbitMask(I, Builder)) | |||
1389 | return V; | |||
1390 | ||||
1391 | return Changed ? &I : nullptr; | |||
1392 | } | |||
1393 | ||||
1394 | Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { | |||
1395 | if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1), | |||
| ||||
1396 | I.getFastMathFlags(), | |||
1397 | SQ.getWithInstruction(&I))) | |||
1398 | return replaceInstUsesWith(I, V); | |||
1399 | ||||
1400 | if (SimplifyAssociativeOrCommutative(I)) | |||
1401 | return &I; | |||
1402 | ||||
1403 | if (Instruction *X = foldShuffledBinop(I)) | |||
1404 | return X; | |||
1405 | ||||
1406 | if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I)) | |||
1407 | return FoldedFAdd; | |||
1408 | ||||
1409 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | |||
1410 | Value *X; | |||
1411 | // (-X) + Y --> Y - X | |||
1412 | if (match(LHS, m_FNeg(m_Value(X)))) | |||
1413 | return BinaryOperator::CreateFSubFMF(RHS, X, &I); | |||
1414 | // Y + (-X) --> Y - X | |||
1415 | if (match(RHS, m_FNeg(m_Value(X)))) | |||
1416 | return BinaryOperator::CreateFSubFMF(LHS, X, &I); | |||
1417 | ||||
1418 | // Check for (fadd double (sitofp x), y), see if we can merge this into an | |||
1419 | // integer add followed by a promotion. | |||
1420 | if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { | |||
1421 | Value *LHSIntVal = LHSConv->getOperand(0); | |||
1422 | Type *FPType = LHSConv->getType(); | |||
1423 | ||||
1424 | // TODO: This check is overly conservative. In many cases known bits | |||
1425 | // analysis can tell us that the result of the addition has less significant | |||
1426 | // bits than the integer type can hold. | |||
1427 | auto IsValidPromotion = [](Type *FTy, Type *ITy) { | |||
1428 | Type *FScalarTy = FTy->getScalarType(); | |||
1429 | Type *IScalarTy = ITy->getScalarType(); | |||
1430 | ||||
1431 | // Do we have enough bits in the significand to represent the result of | |||
1432 | // the integer addition? | |||
1433 | unsigned MaxRepresentableBits = | |||
1434 | APFloat::semanticsPrecision(FScalarTy->getFltSemantics()); | |||
1435 | return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits; | |||
1436 | }; | |||
1437 | ||||
1438 | // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) | |||
1439 | // ... if the constant fits in the integer value. This is useful for things | |||
1440 | // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer | |||
1441 | // requires a constant pool load, and generally allows the add to be better | |||
1442 | // instcombined. | |||
1443 | if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) | |||
1444 | if (IsValidPromotion(FPType, LHSIntVal->getType())) { | |||
1445 | Constant *CI = | |||
1446 | ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); | |||
1447 | if (LHSConv->hasOneUse() && | |||
1448 | ConstantExpr::getSIToFP(CI, I.getType()) == CFP && | |||
1449 | willNotOverflowSignedAdd(LHSIntVal, CI, I)) { | |||
1450 | // Insert the new integer add. | |||
1451 | Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); | |||
1452 | return new SIToFPInst(NewAdd, I.getType()); | |||
1453 | } | |||
1454 | } | |||
1455 | ||||
1456 | // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) | |||
1457 | if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { | |||
1458 | Value *RHSIntVal = RHSConv->getOperand(0); | |||
1459 | // It's enough to check LHS types only because we require int types to | |||
1460 | // be the same for this transform. | |||
1461 | if (IsValidPromotion(FPType, LHSIntVal->getType())) { | |||
1462 | // Only do this if x/y have the same type, if at least one of them has a | |||
1463 | // single use (so we don't increase the number of int->fp conversions), | |||
1464 | // and if the integer add will not overflow. | |||
1465 | if (LHSIntVal->getType() == RHSIntVal->getType() && | |||
1466 | (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && | |||
1467 | willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { | |||
1468 | // Insert the new integer add. | |||
1469 | Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); | |||
1470 | return new SIToFPInst(NewAdd, I.getType()); | |||
1471 | } | |||
1472 | } | |||
1473 | } | |||
1474 | } | |||
1475 | ||||
1476 | // Handle specials cases for FAdd with selects feeding the operation | |||
1477 | if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS)) | |||
1478 | return replaceInstUsesWith(I, V); | |||
1479 | ||||
1480 | if (I.hasAllowReassoc() && I.hasNoSignedZeros()) { | |||
1481 | if (Value *V = FAddCombine(Builder).simplify(&I)) | |||
1482 | return replaceInstUsesWith(I, V); | |||
1483 | } | |||
1484 | ||||
1485 | return nullptr; | |||
1486 | } | |||
1487 | ||||
1488 | /// Optimize pointer differences into the same array into a size. Consider: | |||
1489 | /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer | |||
1490 | /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. | |||
1491 | Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, | |||
1492 | Type *Ty) { | |||
1493 | // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize | |||
1494 | // this. | |||
1495 | bool Swapped = false; | |||
1496 | GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; | |||
1497 | ||||
1498 | // For now we require one side to be the base pointer "A" or a constant | |||
1499 | // GEP derived from it. | |||
1500 | if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { | |||
1501 | // (gep X, ...) - X | |||
1502 | if (LHSGEP->getOperand(0) == RHS) { | |||
1503 | GEP1 = LHSGEP; | |||
1504 | Swapped = false; | |||
1505 | } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { | |||
1506 | // (gep X, ...) - (gep X, ...) | |||
1507 | if (LHSGEP->getOperand(0)->stripPointerCasts() == | |||
1508 | RHSGEP->getOperand(0)->stripPointerCasts()) { | |||
1509 | GEP2 = RHSGEP; | |||
1510 | GEP1 = LHSGEP; | |||
1511 | Swapped = false; | |||
1512 | } | |||
1513 | } | |||
1514 | } | |||
1515 | ||||
1516 | if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { | |||
1517 | // X - (gep X, ...) | |||
1518 | if (RHSGEP->getOperand(0) == LHS) { | |||
1519 | GEP1 = RHSGEP; | |||
1520 | Swapped = true; | |||
1521 | } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { | |||
1522 | // (gep X, ...) - (gep X, ...) | |||
1523 | if (RHSGEP->getOperand(0)->stripPointerCasts() == | |||
1524 | LHSGEP->getOperand(0)->stripPointerCasts()) { | |||
1525 | GEP2 = LHSGEP; | |||
1526 | GEP1 = RHSGEP; | |||
1527 | Swapped = true; | |||
1528 | } | |||
1529 | } | |||
1530 | } | |||
1531 | ||||
1532 | if (!GEP1) | |||
1533 | // No GEP found. | |||
1534 | return nullptr; | |||
1535 | ||||
1536 | if (GEP2) { | |||
1537 | // (gep X, ...) - (gep X, ...) | |||
1538 | // | |||
1539 | // Avoid duplicating the arithmetic if there are more than one non-constant | |||
1540 | // indices between the two GEPs and either GEP has a non-constant index and | |||
1541 | // multiple users. If zero non-constant index, the result is a constant and | |||
1542 | // there is no duplication. If one non-constant index, the result is an add | |||
1543 | // or sub with a constant, which is no larger than the original code, and | |||
1544 | // there's no duplicated arithmetic, even if either GEP has multiple | |||
1545 | // users. If more than one non-constant indices combined, as long as the GEP | |||
1546 | // with at least one non-constant index doesn't have multiple users, there | |||
1547 | // is no duplication. | |||
1548 | unsigned NumNonConstantIndices1 = GEP1->countNonConstantIndices(); | |||
1549 | unsigned NumNonConstantIndices2 = GEP2->countNonConstantIndices(); | |||
1550 | if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 && | |||
1551 | ((NumNonConstantIndices1 > 0 && !GEP1->hasOneUse()) || | |||
1552 | (NumNonConstantIndices2 > 0 && !GEP2->hasOneUse()))) { | |||
1553 | return nullptr; | |||
1554 | } | |||
1555 | } | |||
1556 | ||||
1557 | // Emit the offset of the GEP and an intptr_t. | |||
1558 | Value *Result = EmitGEPOffset(GEP1); | |||
1559 | ||||
1560 | // If we had a constant expression GEP on the other side offsetting the | |||
1561 | // pointer, subtract it from the offset we have. | |||
1562 | if (GEP2) { | |||
1563 | Value *Offset = EmitGEPOffset(GEP2); | |||
1564 | Result = Builder.CreateSub(Result, Offset); | |||
1565 | } | |||
1566 | ||||
1567 | // If we have p - gep(p, ...) then we have to negate the result. | |||
1568 | if (Swapped) | |||
1569 | Result = Builder.CreateNeg(Result, "diff.neg"); | |||
1570 | ||||
1571 | return Builder.CreateIntCast(Result, Ty, true); | |||
1572 | } | |||
1573 | ||||
1574 | Instruction *InstCombiner::visitSub(BinaryOperator &I) { | |||
1575 | if (Value *V = SimplifySubInst(I.getOperand(0), I.getOperand(1), | |||
1576 | I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), | |||
1577 | SQ.getWithInstruction(&I))) | |||
1578 | return replaceInstUsesWith(I, V); | |||
1579 | ||||
1580 | if (Instruction *X = foldShuffledBinop(I)) | |||
1581 | return X; | |||
1582 | ||||
1583 | // (A*B)-(A*C) -> A*(B-C) etc | |||
1584 | if (Value *V = SimplifyUsingDistributiveLaws(I)) | |||
1585 | return replaceInstUsesWith(I, V); | |||
1586 | ||||
1587 | // If this is a 'B = x-(-A)', change to B = x+A. | |||
1588 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |||
1589 | if (Value *V = dyn_castNegVal(Op1)) { | |||
1590 | BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); | |||
1591 | ||||
1592 | if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) { | |||
1593 | assert(BO->getOpcode() == Instruction::Sub &&(static_cast <bool> (BO->getOpcode() == Instruction:: Sub && "Expected a subtraction operator!") ? void (0) : __assert_fail ("BO->getOpcode() == Instruction::Sub && \"Expected a subtraction operator!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 1594, __extension__ __PRETTY_FUNCTION__)) | |||
1594 | "Expected a subtraction operator!")(static_cast <bool> (BO->getOpcode() == Instruction:: Sub && "Expected a subtraction operator!") ? void (0) : __assert_fail ("BO->getOpcode() == Instruction::Sub && \"Expected a subtraction operator!\"" , "/build/llvm-toolchain-snapshot-7~svn338205/lib/Transforms/InstCombine/InstCombineAddSub.cpp" , 1594, __extension__ __PRETTY_FUNCTION__)); | |||
1595 | if (BO->hasNoSignedWrap() && I.hasNoSignedWrap()) | |||
1596 | Res->setHasNoSignedWrap(true); | |||
1597 | } else { | |||
1598 | if (cast<Constant>(Op1)->isNotMinSignedValue() && I.hasNoSignedWrap()) | |||
1599 | Res->setHasNoSignedWrap(true); | |||
1600 | } | |||
1601 | ||||
1602 | return Res; | |||
1603 | } | |||
1604 | ||||
1605 | if (I.getType()->isIntOrIntVectorTy(1)) | |||
1606 | return BinaryOperator::CreateXor(Op0, Op1); | |||
1607 | ||||
1608 | // Replace (-1 - A) with (~A). | |||
1609 | if (match(Op0, m_AllOnes())) | |||
1610 | return BinaryOperator::CreateNot(Op1); | |||
1611 | ||||
1612 | // (~X) - (~Y) --> Y - X | |||
1613 | Value *X, *Y; | |||
1614 | if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y)))) | |||
1615 | return BinaryOperator::CreateSub(Y, X); | |||
1616 | ||||
1617 | if (Constant *C = dyn_cast<Constant>(Op0)) { | |||
1618 | bool IsNegate = match(C, m_ZeroInt()); | |||
1619 | Value *X; | |||
1620 | if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { | |||
1621 | // 0 - (zext bool) --> sext bool | |||
1622 | // C - (zext bool) --> bool ? C - 1 : C | |||
1623 | if (IsNegate) | |||
1624 | return CastInst::CreateSExtOrBitCast(X, I.getType()); | |||
1625 | return SelectInst::Create(X, SubOne(C), C); | |||
1626 | } | |||
1627 | if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { | |||
1628 | // 0 - (sext bool) --> zext bool | |||
1629 | // C - (sext bool) --> bool ? C + 1 : C | |||
1630 | if (IsNegate) | |||
1631 | return CastInst::CreateZExtOrBitCast(X, I.getType()); | |||
1632 | return SelectInst::Create(X, AddOne(C), C); | |||
1633 | } | |||
1634 | ||||
1635 | // C - ~X == X + (1+C) | |||
1636 | if (match(Op1, m_Not(m_Value(X)))) | |||
1637 | return BinaryOperator::CreateAdd(X, AddOne(C)); | |||
1638 | ||||
1639 | // Try to fold constant sub into select arguments. | |||
1640 | if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) | |||
1641 | if (Instruction *R = FoldOpIntoSelect(I, SI)) | |||
1642 | return R; | |||
1643 | ||||
1644 | // Try to fold constant sub into PHI values. | |||
1645 | if (PHINode *PN = dyn_cast<PHINode>(Op1)) | |||
1646 | if (Instruction *R = foldOpIntoPhi(I, PN)) | |||
1647 | return R; | |||
1648 | ||||
1649 | // C-(X+C2) --> (C-C2)-X | |||
1650 | Constant *C2; | |||
1651 | if (match(Op1, m_Add(m_Value(X), m_Constant(C2)))) | |||
1652 | return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); | |||
1653 | } | |||
1654 | ||||
1655 | const APInt *Op0C; | |||
1656 | if (match(Op0, m_APInt(Op0C))) { | |||
1657 | unsigned BitWidth = I.getType()->getScalarSizeInBits(); | |||
1658 | ||||
1659 | // -(X >>u 31) -> (X >>s 31) | |||
1660 | // -(X >>s 31) -> (X >>u 31) | |||
1661 | if (Op0C->isNullValue()) { | |||
1662 | Value *X; | |||
1663 | const APInt *ShAmt; | |||
1664 | if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && | |||
1665 | *ShAmt == BitWidth - 1) { | |||
1666 | Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1); | |||
1667 | return BinaryOperator::CreateAShr(X, ShAmtOp); | |||
1668 | } | |||
1669 | if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) && | |||
1670 | *ShAmt == BitWidth - 1) { | |||
1671 | Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1); | |||
1672 | return BinaryOperator::CreateLShr(X, ShAmtOp); | |||
1673 | } | |||
1674 | ||||
1675 | if (Op1->hasOneUse()) { | |||
1676 | Value *LHS, *RHS; | |||
1677 | SelectPatternFlavor SPF = matchSelectPattern(Op1, LHS, RHS).Flavor; | |||
1678 | if (SPF == SPF_ABS || SPF == SPF_NABS) { | |||
1679 | // This is a negate of an ABS/NABS pattern. Just swap the operands | |||
1680 | // of the select. | |||
1681 | SelectInst *SI = cast<SelectInst>(Op1); | |||
1682 | Value *TrueVal = SI->getTrueValue(); | |||
1683 | Value *FalseVal = SI->getFalseValue(); | |||
1684 | SI->setTrueValue(FalseVal); | |||
1685 | SI->setFalseValue(TrueVal); | |||
1686 | // Don't swap prof metadata, we didn't change the branch behavior. | |||
1687 | return replaceInstUsesWith(I, SI); | |||
1688 | } | |||
1689 | } | |||
1690 | } | |||
1691 | ||||
1692 | // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known | |||
1693 | // zero. | |||
1694 | if (Op0C->isMask()) { | |||
1695 | KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); | |||
1696 | if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) | |||
1697 | return BinaryOperator::CreateXor(Op1, Op0); | |||
1698 | } | |||
1699 | } | |||
1700 | ||||
1701 | { | |||
1702 | Value *Y; | |||
1703 | // X-(X+Y) == -Y X-(Y+X) == -Y | |||
1704 | if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y)))) | |||
1705 | return BinaryOperator::CreateNeg(Y); | |||
1706 | ||||
1707 | // (X-Y)-X == -Y | |||
1708 | if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) | |||
1709 | return BinaryOperator::CreateNeg(Y); | |||
1710 | } | |||
1711 | ||||
1712 | // (sub (or A, B), (xor A, B)) --> (and A, B) | |||
1713 | { | |||
1714 | Value *A, *B; | |||
1715 | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && | |||
1716 | match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) | |||
1717 | return BinaryOperator::CreateAnd(A, B); | |||
1718 | } | |||
1719 | ||||
1720 | { | |||
1721 | Value *Y; | |||
1722 | // ((X | Y) - X) --> (~X & Y) | |||
1723 | if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1))))) | |||
1724 | return BinaryOperator::CreateAnd( | |||
1725 | Y, Builder.CreateNot(Op1, Op1->getName() + ".not")); | |||
1726 | } | |||
1727 | ||||
1728 | if (Op1->hasOneUse()) { | |||
1729 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; | |||
1730 | Constant *C = nullptr; | |||
1731 | ||||
1732 | // (X - (Y - Z)) --> (X + (Z - Y)). | |||
1733 | if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) | |||
1734 | return BinaryOperator::CreateAdd(Op0, | |||
1735 | Builder.CreateSub(Z, Y, Op1->getName())); | |||
1736 | ||||
1737 | // (X - (X & Y)) --> (X & ~Y) | |||
1738 | if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))) | |||
1739 | return BinaryOperator::CreateAnd(Op0, | |||
1740 | Builder.CreateNot(Y, Y->getName() + ".not")); | |||
1741 | ||||
1742 | // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. | |||
1743 | if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && | |||
1744 | C->isNotMinSignedValue() && !C->isOneValue()) | |||
1745 | return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); | |||
1746 | ||||
1747 | // 0 - (X << Y) -> (-X << Y) when X is freely negatable. | |||
1748 | if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) | |||
1749 | if (Value *XNeg = dyn_castNegVal(X)) | |||
1750 | return BinaryOperator::CreateShl(XNeg, Y); | |||
1751 | ||||
1752 | // Subtracting -1/0 is the same as adding 1/0: | |||
1753 | // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y) | |||
1754 | // 'nuw' is dropped in favor of the canonical form. | |||
1755 | if (match(Op1, m_SExt(m_Value(Y))) && | |||
1756 | Y->getType()->getScalarSizeInBits() == 1) { | |||
1757 | Value *Zext = Builder.CreateZExt(Y, I.getType()); | |||
1758 | BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext); | |||
1759 | Add->setHasNoSignedWrap(I.hasNoSignedWrap()); | |||
1760 | return Add; | |||
1761 | } | |||
1762 | ||||
1763 | // X - A*-B -> X + A*B | |||
1764 | // X - -A*B -> X + A*B | |||
1765 | Value *A, *B; | |||
1766 | Constant *CI; | |||
1767 | if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B))))) | |||
1768 | return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B)); | |||
1769 | ||||
1770 | // X - A*CI -> X + A*-CI | |||
1771 | // No need to handle commuted multiply because multiply handling will | |||
1772 | // ensure constant will be move to the right hand side. | |||
1773 | if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) { | |||
1774 | Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI)); | |||
1775 | return BinaryOperator::CreateAdd(Op0, NewMul); | |||
1776 | } | |||
1777 | } | |||
1778 | ||||
1779 | // Optimize pointer differences into the same array into a size. Consider: | |||
1780 | // &A[10] - &A[0]: we should compile this to "10". | |||
1781 | Value *LHSOp, *RHSOp; | |||
1782 | if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && | |||
1783 | match(Op1, m_PtrToInt(m_Value(RHSOp)))) | |||
1784 | if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) | |||
1785 | return replaceInstUsesWith(I, Res); | |||
1786 | ||||
1787 | // trunc(p)-trunc(q) -> trunc(p-q) | |||
1788 | if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && | |||
1789 | match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) | |||
1790 | if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) | |||
1791 | return replaceInstUsesWith(I, Res); | |||
1792 | ||||
1793 | // Canonicalize a shifty way to code absolute value to the common pattern. | |||
1794 | // There are 2 potential commuted variants. | |||
1795 | // We're relying on the fact that we only do this transform when the shift has | |||
1796 | // exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase | |||
1797 | // instructions). | |||
1798 | Value *A; | |||
1799 | const APInt *ShAmt; | |||
1800 | Type *Ty = I.getType(); | |||
1801 | if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) && | |||
1802 | Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 && | |||
1803 | match(Op0, m_OneUse(m_c_Xor(m_Specific(A), m_Specific(Op1))))) { | |||
1804 | // B = ashr i32 A, 31 ; smear the sign bit | |||
1805 | // sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1) | |||
1806 | // --> (A < 0) ? -A : A | |||
1807 | Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty)); | |||
1808 | // Copy the nuw/nsw flags from the sub to the negate. | |||
1809 | Value *Neg = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(), | |||
1810 | I.hasNoSignedWrap()); | |||
1811 | return SelectInst::Create(Cmp, Neg, A); | |||
1812 | } | |||
1813 | ||||
1814 | bool Changed = false; | |||
1815 | if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) { | |||
1816 | Changed = true; | |||
1817 | I.setHasNoSignedWrap(true); | |||
1818 | } | |||
1819 | if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) { | |||
1820 | Changed = true; | |||
1821 | I.setHasNoUnsignedWrap(true); | |||
1822 | } | |||
1823 | ||||
1824 | return Changed ? &I : nullptr; | |||
1825 | } | |||
1826 | ||||
1827 | Instruction *InstCombiner::visitFSub(BinaryOperator &I) { | |||
1828 | if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1), | |||
1829 | I.getFastMathFlags(), | |||
1830 | SQ.getWithInstruction(&I))) | |||
1831 | return replaceInstUsesWith(I, V); | |||
1832 | ||||
1833 | if (Instruction *X = foldShuffledBinop(I)) | |||
1834 | return X; | |||
1835 | ||||
1836 | // Subtraction from -0.0 is the canonical form of fneg. | |||
1837 | // fsub nsz 0, X ==> fsub nsz -0.0, X | |||
1838 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |||
1839 | if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP())) | |||
1840 | return BinaryOperator::CreateFNegFMF(Op1, &I); | |||
1841 | ||||
1842 | // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X) | |||
1843 | // Canonicalize to fadd to make analysis easier. | |||
1844 | // This can also help codegen because fadd is commutative. | |||
1845 | // Note that if this fsub was really an fneg, the fadd with -0.0 will get | |||
1846 | // killed later. We still limit that particular transform with 'hasOneUse' | |||
1847 | // because an fneg is assumed better/cheaper than a generic fsub. | |||
1848 | Value *X, *Y; | |||
1849 | if (I.hasNoSignedZeros() || CannotBeNegativeZero(Op0, SQ.TLI)) { | |||
1850 | if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) { | |||
1851 | Value *NewSub = Builder.CreateFSubFMF(Y, X, &I); | |||
1852 | return BinaryOperator::CreateFAddFMF(Op0, NewSub, &I); | |||
1853 | } | |||
1854 | } | |||
1855 | ||||
1856 | if (isa<Constant>(Op0)) | |||
1857 | if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) | |||
1858 | if (Instruction *NV = FoldOpIntoSelect(I, SI)) | |||
1859 | return NV; | |||
1860 | ||||
1861 | // X - C --> X + (-C) | |||
1862 | // But don't transform constant expressions because there's an inverse fold | |||
1863 | // for X + (-Y) --> X - Y. | |||
1864 | Constant *C; | |||
1865 | if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1)) | |||
1866 | return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I); | |||
1867 | ||||
1868 | // X - (-Y) --> X + Y | |||
1869 | if (match(Op1, m_FNeg(m_Value(Y)))) | |||
1870 | return BinaryOperator::CreateFAddFMF(Op0, Y, &I); | |||
1871 | ||||
1872 | // Similar to above, but look through a cast of the negated value: | |||
1873 | // X - (fptrunc(-Y)) --> X + fptrunc(Y) | |||
1874 | if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y)))))) { | |||
1875 | Value *TruncY = Builder.CreateFPTrunc(Y, I.getType()); | |||
1876 | return BinaryOperator::CreateFAddFMF(Op0, TruncY, &I); | |||
1877 | } | |||
1878 | // X - (fpext(-Y)) --> X + fpext(Y) | |||
1879 | if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y)))))) { | |||
1880 | Value *ExtY = Builder.CreateFPExt(Y, I.getType()); | |||
1881 | return BinaryOperator::CreateFAddFMF(Op0, ExtY, &I); | |||
1882 | } | |||
1883 | ||||
1884 | // Handle specials cases for FSub with selects feeding the operation | |||
1885 | if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) | |||
1886 | return replaceInstUsesWith(I, V); | |||
1887 | ||||
1888 | if (I.hasAllowReassoc() && I.hasNoSignedZeros()) { | |||
1889 | if (Value *V = FAddCombine(Builder).simplify(&I)) | |||
1890 | return replaceInstUsesWith(I, V); | |||
1891 | } | |||
1892 | ||||
1893 | return nullptr; | |||
1894 | } |