LLVM 19.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
87 dbgs() << "[ ";
88 Ops[i].Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Ops[i].Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
245 cast<BinaryOperator>(I)->swapOperands();
246}
247
249 Instruction *InsertBefore, Value *FlagsOp) {
250 if (S1->getType()->isIntOrIntVectorTy())
251 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
252 else {
253 BinaryOperator *Res =
254 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
255 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
256 return Res;
257 }
258}
259
261 Instruction *InsertBefore, Value *FlagsOp) {
262 if (S1->getType()->isIntOrIntVectorTy())
263 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
264 else {
265 BinaryOperator *Res =
266 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
267 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
268 return Res;
269 }
270}
271
273 Instruction *InsertBefore, Value *FlagsOp) {
274 if (S1->getType()->isIntOrIntVectorTy())
275 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
276
277 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
278 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
279
280 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
281}
282
283/// Replace 0-X with X*-1.
285 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
286 "Expected a Negate!");
287 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
288 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
289 Type *Ty = Neg->getType();
290 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
291 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
292
293 BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
294 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
295 Res->takeName(Neg);
296 Neg->replaceAllUsesWith(Res);
297 Res->setDebugLoc(Neg->getDebugLoc());
298 return Res;
299}
300
301/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
302/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
303/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
304/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
305/// even x in Bitwidth-bit arithmetic.
306static unsigned CarmichaelShift(unsigned Bitwidth) {
307 if (Bitwidth < 3)
308 return Bitwidth - 1;
309 return Bitwidth - 2;
310}
311
312/// Add the extra weight 'RHS' to the existing weight 'LHS',
313/// reducing the combined weight using any special properties of the operation.
314/// The existing weight LHS represents the computation X op X op ... op X where
315/// X occurs LHS times. The combined weight represents X op X op ... op X with
316/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
317/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
318/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
319static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
320 // If we were working with infinite precision arithmetic then the combined
321 // weight would be LHS + RHS. But we are using finite precision arithmetic,
322 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
323 // for nilpotent operations and addition, but not for idempotent operations
324 // and multiplication), so it is important to correctly reduce the combined
325 // weight back into range if wrapping would be wrong.
326
327 // If RHS is zero then the weight didn't change.
328 if (RHS.isMinValue())
329 return;
330 // If LHS is zero then the combined weight is RHS.
331 if (LHS.isMinValue()) {
332 LHS = RHS;
333 return;
334 }
335 // From this point on we know that neither LHS nor RHS is zero.
336
337 if (Instruction::isIdempotent(Opcode)) {
338 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
339 // weight of 1. Keeping weights at zero or one also means that wrapping is
340 // not a problem.
341 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
342 return; // Return a weight of 1.
343 }
344 if (Instruction::isNilpotent(Opcode)) {
345 // Nilpotent means X op X === 0, so reduce weights modulo 2.
346 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
347 LHS = 0; // 1 + 1 === 0 modulo 2.
348 return;
349 }
350 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
351 // TODO: Reduce the weight by exploiting nsw/nuw?
352 LHS += RHS;
353 return;
354 }
355
356 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
357 "Unknown associative operation!");
358 unsigned Bitwidth = LHS.getBitWidth();
359 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
360 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
361 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
362 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
363 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
364 // which by a happy accident means that they can always be represented using
365 // Bitwidth bits.
366 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
367 // the Carmichael number).
368 if (Bitwidth > 3) {
369 /// CM - The value of Carmichael's lambda function.
370 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
371 // Any weight W >= Threshold can be replaced with W - CM.
372 APInt Threshold = CM + Bitwidth;
373 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
374 // For Bitwidth 4 or more the following sum does not overflow.
375 LHS += RHS;
376 while (LHS.uge(Threshold))
377 LHS -= CM;
378 } else {
379 // To avoid problems with overflow do everything the same as above but using
380 // a larger type.
381 unsigned CM = 1U << CarmichaelShift(Bitwidth);
382 unsigned Threshold = CM + Bitwidth;
383 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
384 "Weights not reduced!");
385 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
386 while (Total >= Threshold)
387 Total -= CM;
388 LHS = Total;
389 }
390}
391
392using RepeatedValue = std::pair<Value*, APInt>;
393
394/// Given an associative binary expression, return the leaf
395/// nodes in Ops along with their weights (how many times the leaf occurs). The
396/// original expression is the same as
397/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
398/// op
399/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
400/// op
401/// ...
402/// op
403/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
404///
405/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
406///
407/// This routine may modify the function, in which case it returns 'true'. The
408/// changes it makes may well be destructive, changing the value computed by 'I'
409/// to something completely different. Thus if the routine returns 'true' then
410/// you MUST either replace I with a new expression computed from the Ops array,
411/// or use RewriteExprTree to put the values back in.
412///
413/// A leaf node is either not a binary operation of the same kind as the root
414/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
415/// opcode), or is the same kind of binary operator but has a use which either
416/// does not belong to the expression, or does belong to the expression but is
417/// a leaf node. Every leaf node has at least one use that is a non-leaf node
418/// of the expression, while for non-leaf nodes (except for the root 'I') every
419/// use is a non-leaf node of the expression.
420///
421/// For example:
422/// expression graph node names
423///
424/// + | I
425/// / \ |
426/// + + | A, B
427/// / \ / \ |
428/// * + * | C, D, E
429/// / \ / \ / \ |
430/// + * | F, G
431///
432/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
433/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
434///
435/// The expression is maximal: if some instruction is a binary operator of the
436/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
437/// then the instruction also belongs to the expression, is not a leaf node of
438/// it, and its operands also belong to the expression (but may be leaf nodes).
439///
440/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
441/// order to ensure that every non-root node in the expression has *exactly one*
442/// use by a non-leaf node of the expression. This destruction means that the
443/// caller MUST either replace 'I' with a new expression or use something like
444/// RewriteExprTree to put the values back in if the routine indicates that it
445/// made a change by returning 'true'.
446///
447/// In the above example either the right operand of A or the left operand of B
448/// will be replaced by undef. If it is B's operand then this gives:
449///
450/// + | I
451/// / \ |
452/// + + | A, B - operand of B replaced with undef
453/// / \ \ |
454/// * + * | C, D, E
455/// / \ / \ / \ |
456/// + * | F, G
457///
458/// Note that such undef operands can only be reached by passing through 'I'.
459/// For example, if you visit operands recursively starting from a leaf node
460/// then you will never see such an undef operand unless you get back to 'I',
461/// which requires passing through a phi node.
462///
463/// Note that this routine may also mutate binary operators of the wrong type
464/// that have all uses inside the expression (i.e. only used by non-leaf nodes
465/// of the expression) if it can turn them into binary operators of the right
466/// type and thus make the expression bigger.
470 bool &HasNUW) {
471 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
472 "Expected a UnaryOperator or BinaryOperator!");
473 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
474 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
475 unsigned Opcode = I->getOpcode();
476 assert(I->isAssociative() && I->isCommutative() &&
477 "Expected an associative and commutative operation!");
478
479 // Visit all operands of the expression, keeping track of their weight (the
480 // number of paths from the expression root to the operand, or if you like
481 // the number of times that operand occurs in the linearized expression).
482 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
483 // while A has weight two.
484
485 // Worklist of non-leaf nodes (their operands are in the expression too) along
486 // with their weights, representing a certain number of paths to the operator.
487 // If an operator occurs in the worklist multiple times then we found multiple
488 // ways to get to it.
489 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
490 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
491 bool Changed = false;
492
493 // Leaves of the expression are values that either aren't the right kind of
494 // operation (eg: a constant, or a multiply in an add tree), or are, but have
495 // some uses that are not inside the expression. For example, in I = X + X,
496 // X = A + B, the value X has two uses (by I) that are in the expression. If
497 // X has any other uses, for example in a return instruction, then we consider
498 // X to be a leaf, and won't analyze it further. When we first visit a value,
499 // if it has more than one use then at first we conservatively consider it to
500 // be a leaf. Later, as the expression is explored, we may discover some more
501 // uses of the value from inside the expression. If all uses turn out to be
502 // from within the expression (and the value is a binary operator of the right
503 // kind) then the value is no longer considered to be a leaf, and its operands
504 // are explored.
505
506 // Leaves - Keeps track of the set of putative leaves as well as the number of
507 // paths to each leaf seen so far.
508 using LeafMap = DenseMap<Value *, APInt>;
509 LeafMap Leaves; // Leaf -> Total weight so far.
510 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
511
512#ifndef NDEBUG
513 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
514#endif
515 while (!Worklist.empty()) {
516 std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
517 I = P.first; // We examine the operands of this binary operator.
518
519 if (isa<OverflowingBinaryOperator>(I))
520 HasNUW &= I->hasNoUnsignedWrap();
521
522 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
523 Value *Op = I->getOperand(OpIdx);
524 APInt Weight = P.second; // Number of paths to this operand.
525 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
526 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
527
528 // If this is a binary operation of the right kind with only one use then
529 // add its operands to the expression.
530 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
531 assert(Visited.insert(Op).second && "Not first visit!");
532 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
533 Worklist.push_back(std::make_pair(BO, Weight));
534 continue;
535 }
536
537 // Appears to be a leaf. Is the operand already in the set of leaves?
538 LeafMap::iterator It = Leaves.find(Op);
539 if (It == Leaves.end()) {
540 // Not in the leaf map. Must be the first time we saw this operand.
541 assert(Visited.insert(Op).second && "Not first visit!");
542 if (!Op->hasOneUse()) {
543 // This value has uses not accounted for by the expression, so it is
544 // not safe to modify. Mark it as being a leaf.
546 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
547 LeafOrder.push_back(Op);
548 Leaves[Op] = Weight;
549 continue;
550 }
551 // No uses outside the expression, try morphing it.
552 } else {
553 // Already in the leaf map.
554 assert(It != Leaves.end() && Visited.count(Op) &&
555 "In leaf map but not visited!");
556
557 // Update the number of paths to the leaf.
558 IncorporateWeight(It->second, Weight, Opcode);
559
560#if 0 // TODO: Re-enable once PR13021 is fixed.
561 // The leaf already has one use from inside the expression. As we want
562 // exactly one such use, drop this new use of the leaf.
563 assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
564 I->setOperand(OpIdx, UndefValue::get(I->getType()));
565 Changed = true;
566
567 // If the leaf is a binary operation of the right kind and we now see
568 // that its multiple original uses were in fact all by nodes belonging
569 // to the expression, then no longer consider it to be a leaf and add
570 // its operands to the expression.
571 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
572 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
573 Worklist.push_back(std::make_pair(BO, It->second));
574 Leaves.erase(It);
575 continue;
576 }
577#endif
578
579 // If we still have uses that are not accounted for by the expression
580 // then it is not safe to modify the value.
581 if (!Op->hasOneUse())
582 continue;
583
584 // No uses outside the expression, try morphing it.
585 Weight = It->second;
586 Leaves.erase(It); // Since the value may be morphed below.
587 }
588
589 // At this point we have a value which, first of all, is not a binary
590 // expression of the right kind, and secondly, is only used inside the
591 // expression. This means that it can safely be modified. See if we
592 // can usefully morph it into an expression of the right kind.
593 assert((!isa<Instruction>(Op) ||
594 cast<Instruction>(Op)->getOpcode() != Opcode
595 || (isa<FPMathOperator>(Op) &&
596 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
597 "Should have been handled above!");
598 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
599
600 // If this is a multiply expression, turn any internal negations into
601 // multiplies by -1 so they can be reassociated. Add any users of the
602 // newly created multiplication by -1 to the redo list, so any
603 // reassociation opportunities that are exposed will be reassociated
604 // further.
605 Instruction *Neg;
606 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
607 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
608 match(Op, m_Instruction(Neg))) {
610 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
612 LLVM_DEBUG(dbgs() << *Mul << '\n');
613 Worklist.push_back(std::make_pair(Mul, Weight));
614 for (User *U : Mul->users()) {
615 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
616 ToRedo.insert(UserBO);
617 }
618 ToRedo.insert(Neg);
619 Changed = true;
620 continue;
621 }
622
623 // Failed to morph into an expression of the right type. This really is
624 // a leaf.
625 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
626 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
627 LeafOrder.push_back(Op);
628 Leaves[Op] = Weight;
629 }
630 }
631
632 // The leaves, repeated according to their weights, represent the linearized
633 // form of the expression.
634 for (Value *V : LeafOrder) {
635 LeafMap::iterator It = Leaves.find(V);
636 if (It == Leaves.end())
637 // Node initially thought to be a leaf wasn't.
638 continue;
639 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
640 APInt Weight = It->second;
641 if (Weight.isMinValue())
642 // Leaf already output or weight reduction eliminated it.
643 continue;
644 // Ensure the leaf is only output once.
645 It->second = 0;
646 Ops.push_back(std::make_pair(V, Weight));
647 }
648
649 // For nilpotent operations or addition there may be no operands, for example
650 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
651 // in both cases the weight reduces to 0 causing the value to be skipped.
652 if (Ops.empty()) {
653 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
654 assert(Identity && "Associative operation without identity!");
655 Ops.emplace_back(Identity, APInt(Bitwidth, 1));
656 }
657
658 return Changed;
659}
660
661/// Now that the operands for this expression tree are
662/// linearized and optimized, emit them in-order.
663void ReassociatePass::RewriteExprTree(BinaryOperator *I,
665 bool HasNUW) {
666 assert(Ops.size() > 1 && "Single values should be used directly!");
667
668 // Since our optimizations should never increase the number of operations, the
669 // new expression can usually be written reusing the existing binary operators
670 // from the original expression tree, without creating any new instructions,
671 // though the rewritten expression may have a completely different topology.
672 // We take care to not change anything if the new expression will be the same
673 // as the original. If more than trivial changes (like commuting operands)
674 // were made then we are obliged to clear out any optional subclass data like
675 // nsw flags.
676
677 /// NodesToRewrite - Nodes from the original expression available for writing
678 /// the new expression into.
679 SmallVector<BinaryOperator*, 8> NodesToRewrite;
680 unsigned Opcode = I->getOpcode();
682
683 /// NotRewritable - The operands being written will be the leaves of the new
684 /// expression and must not be used as inner nodes (via NodesToRewrite) by
685 /// mistake. Inner nodes are always reassociable, and usually leaves are not
686 /// (if they were they would have been incorporated into the expression and so
687 /// would not be leaves), so most of the time there is no danger of this. But
688 /// in rare cases a leaf may become reassociable if an optimization kills uses
689 /// of it, or it may momentarily become reassociable during rewriting (below)
690 /// due it being removed as an operand of one of its uses. Ensure that misuse
691 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
692 /// leaves and refusing to reuse any of them as inner nodes.
693 SmallPtrSet<Value*, 8> NotRewritable;
694 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
695 NotRewritable.insert(Ops[i].Op);
696
697 // ExpressionChangedStart - Non-null if the rewritten expression differs from
698 // the original in some non-trivial way, requiring the clearing of optional
699 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
700 // ExpressionChangedEnd inclusive.
701 BinaryOperator *ExpressionChangedStart = nullptr,
702 *ExpressionChangedEnd = nullptr;
703 for (unsigned i = 0; ; ++i) {
704 // The last operation (which comes earliest in the IR) is special as both
705 // operands will come from Ops, rather than just one with the other being
706 // a subexpression.
707 if (i+2 == Ops.size()) {
708 Value *NewLHS = Ops[i].Op;
709 Value *NewRHS = Ops[i+1].Op;
710 Value *OldLHS = Op->getOperand(0);
711 Value *OldRHS = Op->getOperand(1);
712
713 if (NewLHS == OldLHS && NewRHS == OldRHS)
714 // Nothing changed, leave it alone.
715 break;
716
717 if (NewLHS == OldRHS && NewRHS == OldLHS) {
718 // The order of the operands was reversed. Swap them.
719 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
720 Op->swapOperands();
721 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
722 MadeChange = true;
723 ++NumChanged;
724 break;
725 }
726
727 // The new operation differs non-trivially from the original. Overwrite
728 // the old operands with the new ones.
729 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
730 if (NewLHS != OldLHS) {
731 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
732 if (BO && !NotRewritable.count(BO))
733 NodesToRewrite.push_back(BO);
734 Op->setOperand(0, NewLHS);
735 }
736 if (NewRHS != OldRHS) {
737 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
738 if (BO && !NotRewritable.count(BO))
739 NodesToRewrite.push_back(BO);
740 Op->setOperand(1, NewRHS);
741 }
742 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
743
744 ExpressionChangedStart = Op;
745 if (!ExpressionChangedEnd)
746 ExpressionChangedEnd = Op;
747 MadeChange = true;
748 ++NumChanged;
749
750 break;
751 }
752
753 // Not the last operation. The left-hand side will be a sub-expression
754 // while the right-hand side will be the current element of Ops.
755 Value *NewRHS = Ops[i].Op;
756 if (NewRHS != Op->getOperand(1)) {
757 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
758 if (NewRHS == Op->getOperand(0)) {
759 // The new right-hand side was already present as the left operand. If
760 // we are lucky then swapping the operands will sort out both of them.
761 Op->swapOperands();
762 } else {
763 // Overwrite with the new right-hand side.
764 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
765 if (BO && !NotRewritable.count(BO))
766 NodesToRewrite.push_back(BO);
767 Op->setOperand(1, NewRHS);
768 ExpressionChangedStart = Op;
769 if (!ExpressionChangedEnd)
770 ExpressionChangedEnd = Op;
771 }
772 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
773 MadeChange = true;
774 ++NumChanged;
775 }
776
777 // Now deal with the left-hand side. If this is already an operation node
778 // from the original expression then just rewrite the rest of the expression
779 // into it.
780 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
781 if (BO && !NotRewritable.count(BO)) {
782 Op = BO;
783 continue;
784 }
785
786 // Otherwise, grab a spare node from the original expression and use that as
787 // the left-hand side. If there are no nodes left then the optimizers made
788 // an expression with more nodes than the original! This usually means that
789 // they did something stupid but it might mean that the problem was just too
790 // hard (finding the mimimal number of multiplications needed to realize a
791 // multiplication expression is NP-complete). Whatever the reason, smart or
792 // stupid, create a new node if there are none left.
793 BinaryOperator *NewOp;
794 if (NodesToRewrite.empty()) {
795 Constant *Undef = UndefValue::get(I->getType());
797 Undef, Undef, "", I);
798 if (isa<FPMathOperator>(NewOp))
799 NewOp->setFastMathFlags(I->getFastMathFlags());
800 } else {
801 NewOp = NodesToRewrite.pop_back_val();
802 }
803
804 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
805 Op->setOperand(0, NewOp);
806 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
807 ExpressionChangedStart = Op;
808 if (!ExpressionChangedEnd)
809 ExpressionChangedEnd = Op;
810 MadeChange = true;
811 ++NumChanged;
812 Op = NewOp;
813 }
814
815 // If the expression changed non-trivially then clear out all subclass data
816 // starting from the operator specified in ExpressionChanged, and compactify
817 // the operators to just before the expression root to guarantee that the
818 // expression tree is dominated by all of Ops.
819 if (ExpressionChangedStart) {
820 bool ClearFlags = true;
821 do {
822 // Preserve flags.
823 if (ClearFlags) {
824 if (isa<FPMathOperator>(I)) {
825 FastMathFlags Flags = I->getFastMathFlags();
826 ExpressionChangedStart->clearSubclassOptionalData();
827 ExpressionChangedStart->setFastMathFlags(Flags);
828 } else {
829 ExpressionChangedStart->clearSubclassOptionalData();
830 // Note that it doesn't hold for mul if one of the operands is zero.
831 // TODO: We can preserve NUW flag if we prove that all mul operands
832 // are non-zero.
833 if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add)
834 ExpressionChangedStart->setHasNoUnsignedWrap();
835 }
836 }
837
838 if (ExpressionChangedStart == ExpressionChangedEnd)
839 ClearFlags = false;
840 if (ExpressionChangedStart == I)
841 break;
842
843 // Discard any debug info related to the expressions that has changed (we
844 // can leave debug info related to the root and any operation that didn't
845 // change, since the result of the expression tree should be the same
846 // even after reassociation).
847 if (ClearFlags)
848 replaceDbgUsesWithUndef(ExpressionChangedStart);
849
850 ExpressionChangedStart->moveBefore(I);
851 ExpressionChangedStart =
852 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
853 } while (true);
854 }
855
856 // Throw away any left over nodes from the original expression.
857 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
858 RedoInsts.insert(NodesToRewrite[i]);
859}
860
861/// Insert instructions before the instruction pointed to by BI,
862/// that computes the negative version of the value specified. The negative
863/// version of the value is returned, and BI is left pointing at the instruction
864/// that should be processed next by the reassociation pass.
865/// Also add intermediate instructions to the redo list that are modified while
866/// pushing the negates through adds. These will be revisited to see if
867/// additional opportunities have been exposed.
870 if (auto *C = dyn_cast<Constant>(V)) {
871 const DataLayout &DL = BI->getModule()->getDataLayout();
872 Constant *Res = C->getType()->isFPOrFPVectorTy()
873 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
875 if (Res)
876 return Res;
877 }
878
879 // We are trying to expose opportunity for reassociation. One of the things
880 // that we want to do to achieve this is to push a negation as deep into an
881 // expression chain as possible, to expose the add instructions. In practice,
882 // this means that we turn this:
883 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
884 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
885 // the constants. We assume that instcombine will clean up the mess later if
886 // we introduce tons of unnecessary negation instructions.
887 //
888 if (BinaryOperator *I =
889 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
890 // Push the negates through the add.
891 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
892 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
893 if (I->getOpcode() == Instruction::Add) {
894 I->setHasNoUnsignedWrap(false);
895 I->setHasNoSignedWrap(false);
896 }
897
898 // We must move the add instruction here, because the neg instructions do
899 // not dominate the old add instruction in general. By moving it, we are
900 // assured that the neg instructions we just inserted dominate the
901 // instruction we are about to insert after them.
902 //
903 I->moveBefore(BI);
904 I->setName(I->getName()+".neg");
905
906 // Add the intermediate negates to the redo list as processing them later
907 // could expose more reassociating opportunities.
908 ToRedo.insert(I);
909 return I;
910 }
911
912 // Okay, we need to materialize a negated version of V with an instruction.
913 // Scan the use lists of V to see if we have one already.
914 for (User *U : V->users()) {
915 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
916 continue;
917
918 // We found one! Now we have to make sure that the definition dominates
919 // this use. We do this by moving it to the entry block (if it is a
920 // non-instruction value) or right after the definition. These negates will
921 // be zapped by reassociate later, so we don't need much finesse here.
922 Instruction *TheNeg = dyn_cast<Instruction>(U);
923
924 // We can't safely propagate a vector zero constant with poison/undef lanes.
925 Constant *C;
926 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
927 C->containsUndefOrPoisonElement())
928 continue;
929
930 // Verify that the negate is in this function, V might be a constant expr.
931 if (!TheNeg ||
932 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
933 continue;
934
935 BasicBlock::iterator InsertPt;
936 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
937 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
938 if (!InsertPtOpt)
939 continue;
940 InsertPt = *InsertPtOpt;
941 } else {
942 InsertPt = TheNeg->getFunction()
943 ->getEntryBlock()
945 ->getIterator();
946 }
947
948 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
949 if (TheNeg->getOpcode() == Instruction::Sub) {
950 TheNeg->setHasNoUnsignedWrap(false);
951 TheNeg->setHasNoSignedWrap(false);
952 } else {
953 TheNeg->andIRFlags(BI);
954 }
955 ToRedo.insert(TheNeg);
956 return TheNeg;
957 }
958
959 // Insert a 'neg' instruction that subtracts the value from zero to get the
960 // negation.
961 Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
962 ToRedo.insert(NewNeg);
963 return NewNeg;
964}
965
966// See if this `or` looks like an load widening reduction, i.e. that it
967// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
968// ensure that the pattern is *really* a load widening reduction,
969// we do not ensure that it can really be replaced with a widened load,
970// only that it mostly looks like one.
974
975 auto Enqueue = [&](Value *V) {
976 auto *I = dyn_cast<Instruction>(V);
977 // Each node of an `or` reduction must be an instruction,
978 if (!I)
979 return false; // Node is certainly not part of an `or` load reduction.
980 // Only process instructions we have never processed before.
981 if (Visited.insert(I).second)
982 Worklist.emplace_back(I);
983 return true; // Will need to look at parent nodes.
984 };
985
986 if (!Enqueue(Or))
987 return false; // Not an `or` reduction pattern.
988
989 while (!Worklist.empty()) {
990 auto *I = Worklist.pop_back_val();
991
992 // Okay, which instruction is this node?
993 switch (I->getOpcode()) {
994 case Instruction::Or:
995 // Got an `or` node. That's fine, just recurse into it's operands.
996 for (Value *Op : I->operands())
997 if (!Enqueue(Op))
998 return false; // Not an `or` reduction pattern.
999 continue;
1000
1001 case Instruction::Shl:
1002 case Instruction::ZExt:
1003 // `shl`/`zext` nodes are fine, just recurse into their base operand.
1004 if (!Enqueue(I->getOperand(0)))
1005 return false; // Not an `or` reduction pattern.
1006 continue;
1007
1008 case Instruction::Load:
1009 // Perfect, `load` node means we've reached an edge of the graph.
1010 continue;
1011
1012 default: // Unknown node.
1013 return false; // Not an `or` reduction pattern.
1014 }
1015 }
1016
1017 return true;
1018}
1019
1020/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
1022 // Don't bother to convert this up unless either the LHS is an associable add
1023 // or subtract or mul or if this is only used by one of the above.
1024 // This is only a compile-time improvement, it is not needed for correctness!
1025 auto isInteresting = [](Value *V) {
1026 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
1027 Instruction::Shl})
1028 if (isReassociableOp(V, Op))
1029 return true;
1030 return false;
1031 };
1032
1033 if (any_of(Or->operands(), isInteresting))
1034 return true;
1035
1036 Value *VB = Or->user_back();
1037 if (Or->hasOneUse() && isInteresting(VB))
1038 return true;
1039
1040 return false;
1041}
1042
1043/// If we have (X|Y), and iff X and Y have no common bits set,
1044/// transform this into (X+Y) to allow arithmetics reassociation.
1046 // Convert an or into an add.
1047 BinaryOperator *New =
1048 CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1049 New->setHasNoSignedWrap();
1050 New->setHasNoUnsignedWrap();
1051 New->takeName(Or);
1052
1053 // Everyone now refers to the add instruction.
1054 Or->replaceAllUsesWith(New);
1055 New->setDebugLoc(Or->getDebugLoc());
1056
1057 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1058 return New;
1059}
1060
1061/// Return true if we should break up this subtract of X-Y into (X + -Y).
1063 // If this is a negation, we can't split it up!
1064 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1065 return false;
1066
1067 // Don't breakup X - undef.
1068 if (isa<UndefValue>(Sub->getOperand(1)))
1069 return false;
1070
1071 // Don't bother to break this up unless either the LHS is an associable add or
1072 // subtract or if this is only used by one.
1073 Value *V0 = Sub->getOperand(0);
1074 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1075 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1076 return true;
1077 Value *V1 = Sub->getOperand(1);
1078 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1079 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1080 return true;
1081 Value *VB = Sub->user_back();
1082 if (Sub->hasOneUse() &&
1083 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1084 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1085 return true;
1086
1087 return false;
1088}
1089
1090/// If we have (X-Y), and if either X is an add, or if this is only used by an
1091/// add, transform this into (X+(0-Y)) to promote better reassociation.
1094 // Convert a subtract into an add and a neg instruction. This allows sub
1095 // instructions to be commuted with other add instructions.
1096 //
1097 // Calculate the negative value of Operand 1 of the sub instruction,
1098 // and set it as the RHS of the add instruction we just made.
1099 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1100 BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1101 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1102 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1103 New->takeName(Sub);
1104
1105 // Everyone now refers to the add instruction.
1106 Sub->replaceAllUsesWith(New);
1107 New->setDebugLoc(Sub->getDebugLoc());
1108
1109 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1110 return New;
1111}
1112
1113/// If this is a shift of a reassociable multiply or is used by one, change
1114/// this into a multiply by a constant to assist with further reassociation.
1116 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1117 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1118 MulCst = ConstantExpr::getShl(MulCst, SA);
1119
1121 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1122 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1123 Mul->takeName(Shl);
1124
1125 // Everyone now refers to the mul instruction.
1126 Shl->replaceAllUsesWith(Mul);
1127 Mul->setDebugLoc(Shl->getDebugLoc());
1128
1129 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1130 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1131 // handling. It can be preserved as long as we're not left shifting by
1132 // bitwidth - 1.
1133 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1134 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1135 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1136 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1137 Mul->setHasNoSignedWrap(true);
1138 Mul->setHasNoUnsignedWrap(NUW);
1139 return Mul;
1140}
1141
1142/// Scan backwards and forwards among values with the same rank as element i
1143/// to see if X exists. If X does not exist, return i. This is useful when
1144/// scanning for 'x' when we see '-x' because they both get the same rank.
1146 unsigned i, Value *X) {
1147 unsigned XRank = Ops[i].Rank;
1148 unsigned e = Ops.size();
1149 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1150 if (Ops[j].Op == X)
1151 return j;
1152 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1153 if (Instruction *I2 = dyn_cast<Instruction>(X))
1154 if (I1->isIdenticalTo(I2))
1155 return j;
1156 }
1157 // Scan backwards.
1158 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1159 if (Ops[j].Op == X)
1160 return j;
1161 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1162 if (Instruction *I2 = dyn_cast<Instruction>(X))
1163 if (I1->isIdenticalTo(I2))
1164 return j;
1165 }
1166 return i;
1167}
1168
1169/// Emit a tree of add instructions, summing Ops together
1170/// and returning the result. Insert the tree before I.
1173 if (Ops.size() == 1) return Ops.back();
1174
1175 Value *V1 = Ops.pop_back_val();
1176 Value *V2 = EmitAddTreeOfValues(I, Ops);
1177 return CreateAdd(V2, V1, "reass.add", I, I);
1178}
1179
1180/// If V is an expression tree that is a multiplication sequence,
1181/// and if this sequence contains a multiply by Factor,
1182/// remove Factor from the tree and return the new tree.
1183Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1184 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1185 if (!BO)
1186 return nullptr;
1187
1189 bool HasNUW = true;
1190 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW);
1192 Factors.reserve(Tree.size());
1193 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1194 RepeatedValue E = Tree[i];
1195 Factors.append(E.second.getZExtValue(),
1196 ValueEntry(getRank(E.first), E.first));
1197 }
1198
1199 bool FoundFactor = false;
1200 bool NeedsNegate = false;
1201 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1202 if (Factors[i].Op == Factor) {
1203 FoundFactor = true;
1204 Factors.erase(Factors.begin()+i);
1205 break;
1206 }
1207
1208 // If this is a negative version of this factor, remove it.
1209 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1210 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1211 if (FC1->getValue() == -FC2->getValue()) {
1212 FoundFactor = NeedsNegate = true;
1213 Factors.erase(Factors.begin()+i);
1214 break;
1215 }
1216 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1217 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1218 const APFloat &F1 = FC1->getValueAPF();
1219 APFloat F2(FC2->getValueAPF());
1220 F2.changeSign();
1221 if (F1 == F2) {
1222 FoundFactor = NeedsNegate = true;
1223 Factors.erase(Factors.begin() + i);
1224 break;
1225 }
1226 }
1227 }
1228 }
1229
1230 if (!FoundFactor) {
1231 // Make sure to restore the operands to the expression tree.
1232 RewriteExprTree(BO, Factors, HasNUW);
1233 return nullptr;
1234 }
1235
1236 BasicBlock::iterator InsertPt = ++BO->getIterator();
1237
1238 // If this was just a single multiply, remove the multiply and return the only
1239 // remaining operand.
1240 if (Factors.size() == 1) {
1241 RedoInsts.insert(BO);
1242 V = Factors[0].Op;
1243 } else {
1244 RewriteExprTree(BO, Factors, HasNUW);
1245 V = BO;
1246 }
1247
1248 if (NeedsNegate)
1249 V = CreateNeg(V, "neg", &*InsertPt, BO);
1250
1251 return V;
1252}
1253
1254/// If V is a single-use multiply, recursively add its operands as factors,
1255/// otherwise add V to the list of factors.
1256///
1257/// Ops is the top-level list of add operands we're trying to factor.
1259 SmallVectorImpl<Value*> &Factors) {
1260 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1261 if (!BO) {
1262 Factors.push_back(V);
1263 return;
1264 }
1265
1266 // Otherwise, add the LHS and RHS to the list of factors.
1269}
1270
1271/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1272/// This optimizes based on identities. If it can be reduced to a single Value,
1273/// it is returned, otherwise the Ops list is mutated as necessary.
1274static Value *OptimizeAndOrXor(unsigned Opcode,
1276 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1277 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1278 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1279 // First, check for X and ~X in the operand list.
1280 assert(i < Ops.size());
1281 Value *X;
1282 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1283 unsigned FoundX = FindInOperandList(Ops, i, X);
1284 if (FoundX != i) {
1285 if (Opcode == Instruction::And) // ...&X&~X = 0
1286 return Constant::getNullValue(X->getType());
1287
1288 if (Opcode == Instruction::Or) // ...|X|~X = -1
1289 return Constant::getAllOnesValue(X->getType());
1290 }
1291 }
1292
1293 // Next, check for duplicate pairs of values, which we assume are next to
1294 // each other, due to our sorting criteria.
1295 assert(i < Ops.size());
1296 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1297 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1298 // Drop duplicate values for And and Or.
1299 Ops.erase(Ops.begin()+i);
1300 --i; --e;
1301 ++NumAnnihil;
1302 continue;
1303 }
1304
1305 // Drop pairs of values for Xor.
1306 assert(Opcode == Instruction::Xor);
1307 if (e == 2)
1308 return Constant::getNullValue(Ops[0].Op->getType());
1309
1310 // Y ^ X^X -> Y
1311 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1312 i -= 1; e -= 2;
1313 ++NumAnnihil;
1314 }
1315 }
1316 return nullptr;
1317}
1318
1319/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1320/// instruction with the given two operands, and return the resulting
1321/// instruction. There are two special cases: 1) if the constant operand is 0,
1322/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1323/// be returned.
1324static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1325 const APInt &ConstOpnd) {
1326 if (ConstOpnd.isZero())
1327 return nullptr;
1328
1329 if (ConstOpnd.isAllOnes())
1330 return Opnd;
1331
1332 Instruction *I = BinaryOperator::CreateAnd(
1333 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1334 InsertBefore);
1335 I->setDebugLoc(InsertBefore->getDebugLoc());
1336 return I;
1337}
1338
1339// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1340// into "R ^ C", where C would be 0, and R is a symbolic value.
1341//
1342// If it was successful, true is returned, and the "R" and "C" is returned
1343// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1344// and both "Res" and "ConstOpnd" remain unchanged.
1345bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1346 APInt &ConstOpnd, Value *&Res) {
1347 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1348 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1349 // = (x & ~c1) ^ (c1 ^ c2)
1350 // It is useful only when c1 == c2.
1351 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1352 return false;
1353
1354 if (!Opnd1->getValue()->hasOneUse())
1355 return false;
1356
1357 const APInt &C1 = Opnd1->getConstPart();
1358 if (C1 != ConstOpnd)
1359 return false;
1360
1361 Value *X = Opnd1->getSymbolicPart();
1362 Res = createAndInstr(I, X, ~C1);
1363 // ConstOpnd was C2, now C1 ^ C2.
1364 ConstOpnd ^= C1;
1365
1366 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1367 RedoInsts.insert(T);
1368 return true;
1369}
1370
1371// Helper function of OptimizeXor(). It tries to simplify
1372// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1373// symbolic value.
1374//
1375// If it was successful, true is returned, and the "R" and "C" is returned
1376// via "Res" and "ConstOpnd", respectively (If the entire expression is
1377// evaluated to a constant, the Res is set to NULL); otherwise, false is
1378// returned, and both "Res" and "ConstOpnd" remain unchanged.
1379bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1380 XorOpnd *Opnd2, APInt &ConstOpnd,
1381 Value *&Res) {
1382 Value *X = Opnd1->getSymbolicPart();
1383 if (X != Opnd2->getSymbolicPart())
1384 return false;
1385
1386 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1387 int DeadInstNum = 1;
1388 if (Opnd1->getValue()->hasOneUse())
1389 DeadInstNum++;
1390 if (Opnd2->getValue()->hasOneUse())
1391 DeadInstNum++;
1392
1393 // Xor-Rule 2:
1394 // (x | c1) ^ (x & c2)
1395 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1396 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1397 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1398 //
1399 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1400 if (Opnd2->isOrExpr())
1401 std::swap(Opnd1, Opnd2);
1402
1403 const APInt &C1 = Opnd1->getConstPart();
1404 const APInt &C2 = Opnd2->getConstPart();
1405 APInt C3((~C1) ^ C2);
1406
1407 // Do not increase code size!
1408 if (!C3.isZero() && !C3.isAllOnes()) {
1409 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1410 if (NewInstNum > DeadInstNum)
1411 return false;
1412 }
1413
1414 Res = createAndInstr(I, X, C3);
1415 ConstOpnd ^= C1;
1416 } else if (Opnd1->isOrExpr()) {
1417 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1418 //
1419 const APInt &C1 = Opnd1->getConstPart();
1420 const APInt &C2 = Opnd2->getConstPart();
1421 APInt C3 = C1 ^ C2;
1422
1423 // Do not increase code size
1424 if (!C3.isZero() && !C3.isAllOnes()) {
1425 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1426 if (NewInstNum > DeadInstNum)
1427 return false;
1428 }
1429
1430 Res = createAndInstr(I, X, C3);
1431 ConstOpnd ^= C3;
1432 } else {
1433 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1434 //
1435 const APInt &C1 = Opnd1->getConstPart();
1436 const APInt &C2 = Opnd2->getConstPart();
1437 APInt C3 = C1 ^ C2;
1438 Res = createAndInstr(I, X, C3);
1439 }
1440
1441 // Put the original operands in the Redo list; hope they will be deleted
1442 // as dead code.
1443 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1444 RedoInsts.insert(T);
1445 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1446 RedoInsts.insert(T);
1447
1448 return true;
1449}
1450
1451/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1452/// to a single Value, it is returned, otherwise the Ops list is mutated as
1453/// necessary.
1454Value *ReassociatePass::OptimizeXor(Instruction *I,
1456 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1457 return V;
1458
1459 if (Ops.size() == 1)
1460 return nullptr;
1461
1463 SmallVector<XorOpnd*, 8> OpndPtrs;
1464 Type *Ty = Ops[0].Op->getType();
1465 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1466
1467 // Step 1: Convert ValueEntry to XorOpnd
1468 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1469 Value *V = Ops[i].Op;
1470 const APInt *C;
1471 // TODO: Support non-splat vectors.
1472 if (match(V, m_APInt(C))) {
1473 ConstOpnd ^= *C;
1474 } else {
1475 XorOpnd O(V);
1476 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1477 Opnds.push_back(O);
1478 }
1479 }
1480
1481 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1482 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1483 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1484 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1485 // when new elements are added to the vector.
1486 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1487 OpndPtrs.push_back(&Opnds[i]);
1488
1489 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1490 // the same symbolic value cluster together. For instance, the input operand
1491 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1492 // ("x | 123", "x & 789", "y & 456").
1493 //
1494 // The purpose is twofold:
1495 // 1) Cluster together the operands sharing the same symbolic-value.
1496 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1497 // could potentially shorten crital path, and expose more loop-invariants.
1498 // Note that values' rank are basically defined in RPO order (FIXME).
1499 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1500 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1501 // "z" in the order of X-Y-Z is better than any other orders.
1502 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1503 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1504 });
1505
1506 // Step 3: Combine adjacent operands
1507 XorOpnd *PrevOpnd = nullptr;
1508 bool Changed = false;
1509 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1510 XorOpnd *CurrOpnd = OpndPtrs[i];
1511 // The combined value
1512 Value *CV;
1513
1514 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1515 if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1516 Changed = true;
1517 if (CV)
1518 *CurrOpnd = XorOpnd(CV);
1519 else {
1520 CurrOpnd->Invalidate();
1521 continue;
1522 }
1523 }
1524
1525 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1526 PrevOpnd = CurrOpnd;
1527 continue;
1528 }
1529
1530 // step 3.2: When previous and current operands share the same symbolic
1531 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1532 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1533 // Remove previous operand
1534 PrevOpnd->Invalidate();
1535 if (CV) {
1536 *CurrOpnd = XorOpnd(CV);
1537 PrevOpnd = CurrOpnd;
1538 } else {
1539 CurrOpnd->Invalidate();
1540 PrevOpnd = nullptr;
1541 }
1542 Changed = true;
1543 }
1544 }
1545
1546 // Step 4: Reassemble the Ops
1547 if (Changed) {
1548 Ops.clear();
1549 for (const XorOpnd &O : Opnds) {
1550 if (O.isInvalid())
1551 continue;
1552 ValueEntry VE(getRank(O.getValue()), O.getValue());
1553 Ops.push_back(VE);
1554 }
1555 if (!ConstOpnd.isZero()) {
1556 Value *C = ConstantInt::get(Ty, ConstOpnd);
1557 ValueEntry VE(getRank(C), C);
1558 Ops.push_back(VE);
1559 }
1560 unsigned Sz = Ops.size();
1561 if (Sz == 1)
1562 return Ops.back().Op;
1563 if (Sz == 0) {
1564 assert(ConstOpnd.isZero());
1565 return ConstantInt::get(Ty, ConstOpnd);
1566 }
1567 }
1568
1569 return nullptr;
1570}
1571
1572/// Optimize a series of operands to an 'add' instruction. This
1573/// optimizes based on identities. If it can be reduced to a single Value, it
1574/// is returned, otherwise the Ops list is mutated as necessary.
1575Value *ReassociatePass::OptimizeAdd(Instruction *I,
1577 // Scan the operand lists looking for X and -X pairs. If we find any, we
1578 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1579 // scan for any
1580 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1581
1582 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1583 Value *TheOp = Ops[i].Op;
1584 // Check to see if we've seen this operand before. If so, we factor all
1585 // instances of the operand together. Due to our sorting criteria, we know
1586 // that these need to be next to each other in the vector.
1587 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1588 // Rescan the list, remove all instances of this operand from the expr.
1589 unsigned NumFound = 0;
1590 do {
1591 Ops.erase(Ops.begin()+i);
1592 ++NumFound;
1593 } while (i != Ops.size() && Ops[i].Op == TheOp);
1594
1595 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1596 << '\n');
1597 ++NumFactor;
1598
1599 // Insert a new multiply.
1600 Type *Ty = TheOp->getType();
1601 Constant *C = Ty->isIntOrIntVectorTy() ?
1602 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1603 Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1604
1605 // Now that we have inserted a multiply, optimize it. This allows us to
1606 // handle cases that require multiple factoring steps, such as this:
1607 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1608 RedoInsts.insert(Mul);
1609
1610 // If every add operand was a duplicate, return the multiply.
1611 if (Ops.empty())
1612 return Mul;
1613
1614 // Otherwise, we had some input that didn't have the dupe, such as
1615 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1616 // things being added by this operation.
1617 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1618
1619 --i;
1620 e = Ops.size();
1621 continue;
1622 }
1623
1624 // Check for X and -X or X and ~X in the operand list.
1625 Value *X;
1626 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1627 !match(TheOp, m_FNeg(m_Value(X))))
1628 continue;
1629
1630 unsigned FoundX = FindInOperandList(Ops, i, X);
1631 if (FoundX == i)
1632 continue;
1633
1634 // Remove X and -X from the operand list.
1635 if (Ops.size() == 2 &&
1636 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1637 return Constant::getNullValue(X->getType());
1638
1639 // Remove X and ~X from the operand list.
1640 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1641 return Constant::getAllOnesValue(X->getType());
1642
1643 Ops.erase(Ops.begin()+i);
1644 if (i < FoundX)
1645 --FoundX;
1646 else
1647 --i; // Need to back up an extra one.
1648 Ops.erase(Ops.begin()+FoundX);
1649 ++NumAnnihil;
1650 --i; // Revisit element.
1651 e -= 2; // Removed two elements.
1652
1653 // if X and ~X we append -1 to the operand list.
1654 if (match(TheOp, m_Not(m_Value()))) {
1655 Value *V = Constant::getAllOnesValue(X->getType());
1656 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1657 e += 1;
1658 }
1659 }
1660
1661 // Scan the operand list, checking to see if there are any common factors
1662 // between operands. Consider something like A*A+A*B*C+D. We would like to
1663 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1664 // To efficiently find this, we count the number of times a factor occurs
1665 // for any ADD operands that are MULs.
1666 DenseMap<Value*, unsigned> FactorOccurrences;
1667
1668 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1669 // where they are actually the same multiply.
1670 unsigned MaxOcc = 0;
1671 Value *MaxOccVal = nullptr;
1672 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1673 BinaryOperator *BOp =
1674 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1675 if (!BOp)
1676 continue;
1677
1678 // Compute all of the factors of this added value.
1679 SmallVector<Value*, 8> Factors;
1680 FindSingleUseMultiplyFactors(BOp, Factors);
1681 assert(Factors.size() > 1 && "Bad linearize!");
1682
1683 // Add one to FactorOccurrences for each unique factor in this op.
1684 SmallPtrSet<Value*, 8> Duplicates;
1685 for (Value *Factor : Factors) {
1686 if (!Duplicates.insert(Factor).second)
1687 continue;
1688
1689 unsigned Occ = ++FactorOccurrences[Factor];
1690 if (Occ > MaxOcc) {
1691 MaxOcc = Occ;
1692 MaxOccVal = Factor;
1693 }
1694
1695 // If Factor is a negative constant, add the negated value as a factor
1696 // because we can percolate the negate out. Watch for minint, which
1697 // cannot be positivified.
1698 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1699 if (CI->isNegative() && !CI->isMinValue(true)) {
1700 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1701 if (!Duplicates.insert(Factor).second)
1702 continue;
1703 unsigned Occ = ++FactorOccurrences[Factor];
1704 if (Occ > MaxOcc) {
1705 MaxOcc = Occ;
1706 MaxOccVal = Factor;
1707 }
1708 }
1709 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1710 if (CF->isNegative()) {
1711 APFloat F(CF->getValueAPF());
1712 F.changeSign();
1713 Factor = ConstantFP::get(CF->getContext(), F);
1714 if (!Duplicates.insert(Factor).second)
1715 continue;
1716 unsigned Occ = ++FactorOccurrences[Factor];
1717 if (Occ > MaxOcc) {
1718 MaxOcc = Occ;
1719 MaxOccVal = Factor;
1720 }
1721 }
1722 }
1723 }
1724 }
1725
1726 // If any factor occurred more than one time, we can pull it out.
1727 if (MaxOcc > 1) {
1728 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1729 << '\n');
1730 ++NumFactor;
1731
1732 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1733 // this, we could otherwise run into situations where removing a factor
1734 // from an expression will drop a use of maxocc, and this can cause
1735 // RemoveFactorFromExpression on successive values to behave differently.
1736 Instruction *DummyInst =
1737 I->getType()->isIntOrIntVectorTy()
1738 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1739 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1740
1742 for (unsigned i = 0; i != Ops.size(); ++i) {
1743 // Only try to remove factors from expressions we're allowed to.
1744 BinaryOperator *BOp =
1745 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1746 if (!BOp)
1747 continue;
1748
1749 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1750 // The factorized operand may occur several times. Convert them all in
1751 // one fell swoop.
1752 for (unsigned j = Ops.size(); j != i;) {
1753 --j;
1754 if (Ops[j].Op == Ops[i].Op) {
1755 NewMulOps.push_back(V);
1756 Ops.erase(Ops.begin()+j);
1757 }
1758 }
1759 --i;
1760 }
1761 }
1762
1763 // No need for extra uses anymore.
1764 DummyInst->deleteValue();
1765
1766 unsigned NumAddedValues = NewMulOps.size();
1767 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1768
1769 // Now that we have inserted the add tree, optimize it. This allows us to
1770 // handle cases that require multiple factoring steps, such as this:
1771 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1772 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1773 (void)NumAddedValues;
1774 if (Instruction *VI = dyn_cast<Instruction>(V))
1775 RedoInsts.insert(VI);
1776
1777 // Create the multiply.
1778 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1779
1780 // Rerun associate on the multiply in case the inner expression turned into
1781 // a multiply. We want to make sure that we keep things in canonical form.
1782 RedoInsts.insert(V2);
1783
1784 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1785 // entire result expression is just the multiply "A*(B+C)".
1786 if (Ops.empty())
1787 return V2;
1788
1789 // Otherwise, we had some input that didn't have the factor, such as
1790 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1791 // things being added by this operation.
1792 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1793 }
1794
1795 return nullptr;
1796}
1797
1798/// Build up a vector of value/power pairs factoring a product.
1799///
1800/// Given a series of multiplication operands, build a vector of factors and
1801/// the powers each is raised to when forming the final product. Sort them in
1802/// the order of descending power.
1803///
1804/// (x*x) -> [(x, 2)]
1805/// ((x*x)*x) -> [(x, 3)]
1806/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1807///
1808/// \returns Whether any factors have a power greater than one.
1810 SmallVectorImpl<Factor> &Factors) {
1811 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1812 // Compute the sum of powers of simplifiable factors.
1813 unsigned FactorPowerSum = 0;
1814 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1815 Value *Op = Ops[Idx-1].Op;
1816
1817 // Count the number of occurrences of this value.
1818 unsigned Count = 1;
1819 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1820 ++Count;
1821 // Track for simplification all factors which occur 2 or more times.
1822 if (Count > 1)
1823 FactorPowerSum += Count;
1824 }
1825
1826 // We can only simplify factors if the sum of the powers of our simplifiable
1827 // factors is 4 or higher. When that is the case, we will *always* have
1828 // a simplification. This is an important invariant to prevent cyclicly
1829 // trying to simplify already minimal formations.
1830 if (FactorPowerSum < 4)
1831 return false;
1832
1833 // Now gather the simplifiable factors, removing them from Ops.
1834 FactorPowerSum = 0;
1835 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1836 Value *Op = Ops[Idx-1].Op;
1837
1838 // Count the number of occurrences of this value.
1839 unsigned Count = 1;
1840 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1841 ++Count;
1842 if (Count == 1)
1843 continue;
1844 // Move an even number of occurrences to Factors.
1845 Count &= ~1U;
1846 Idx -= Count;
1847 FactorPowerSum += Count;
1848 Factors.push_back(Factor(Op, Count));
1849 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1850 }
1851
1852 // None of the adjustments above should have reduced the sum of factor powers
1853 // below our mininum of '4'.
1854 assert(FactorPowerSum >= 4);
1855
1856 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1857 return LHS.Power > RHS.Power;
1858 });
1859 return true;
1860}
1861
1862/// Build a tree of multiplies, computing the product of Ops.
1865 if (Ops.size() == 1)
1866 return Ops.back();
1867
1868 Value *LHS = Ops.pop_back_val();
1869 do {
1870 if (LHS->getType()->isIntOrIntVectorTy())
1871 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1872 else
1873 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1874 } while (!Ops.empty());
1875
1876 return LHS;
1877}
1878
1879/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1880///
1881/// Given a vector of values raised to various powers, where no two values are
1882/// equal and the powers are sorted in decreasing order, compute the minimal
1883/// DAG of multiplies to compute the final product, and return that product
1884/// value.
1885Value *
1886ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1887 SmallVectorImpl<Factor> &Factors) {
1888 assert(Factors[0].Power);
1889 SmallVector<Value *, 4> OuterProduct;
1890 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1891 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1892 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1893 LastIdx = Idx;
1894 continue;
1895 }
1896
1897 // We want to multiply across all the factors with the same power so that
1898 // we can raise them to that power as a single entity. Build a mini tree
1899 // for that.
1900 SmallVector<Value *, 4> InnerProduct;
1901 InnerProduct.push_back(Factors[LastIdx].Base);
1902 do {
1903 InnerProduct.push_back(Factors[Idx].Base);
1904 ++Idx;
1905 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1906
1907 // Reset the base value of the first factor to the new expression tree.
1908 // We'll remove all the factors with the same power in a second pass.
1909 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1910 if (Instruction *MI = dyn_cast<Instruction>(M))
1911 RedoInsts.insert(MI);
1912
1913 LastIdx = Idx;
1914 }
1915 // Unique factors with equal powers -- we've folded them into the first one's
1916 // base.
1917 Factors.erase(std::unique(Factors.begin(), Factors.end(),
1918 [](const Factor &LHS, const Factor &RHS) {
1919 return LHS.Power == RHS.Power;
1920 }),
1921 Factors.end());
1922
1923 // Iteratively collect the base of each factor with an add power into the
1924 // outer product, and halve each power in preparation for squaring the
1925 // expression.
1926 for (Factor &F : Factors) {
1927 if (F.Power & 1)
1928 OuterProduct.push_back(F.Base);
1929 F.Power >>= 1;
1930 }
1931 if (Factors[0].Power) {
1932 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1933 OuterProduct.push_back(SquareRoot);
1934 OuterProduct.push_back(SquareRoot);
1935 }
1936 if (OuterProduct.size() == 1)
1937 return OuterProduct.front();
1938
1939 Value *V = buildMultiplyTree(Builder, OuterProduct);
1940 return V;
1941}
1942
1943Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1945 // We can only optimize the multiplies when there is a chain of more than
1946 // three, such that a balanced tree might require fewer total multiplies.
1947 if (Ops.size() < 4)
1948 return nullptr;
1949
1950 // Try to turn linear trees of multiplies without other uses of the
1951 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1952 // re-use.
1953 SmallVector<Factor, 4> Factors;
1954 if (!collectMultiplyFactors(Ops, Factors))
1955 return nullptr; // All distinct factors, so nothing left for us to do.
1956
1957 IRBuilder<> Builder(I);
1958 // The reassociate transformation for FP operations is performed only
1959 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1960 // to the newly generated operations.
1961 if (auto FPI = dyn_cast<FPMathOperator>(I))
1962 Builder.setFastMathFlags(FPI->getFastMathFlags());
1963
1964 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1965 if (Ops.empty())
1966 return V;
1967
1968 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1969 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1970 return nullptr;
1971}
1972
1973Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1975 // Now that we have the linearized expression tree, try to optimize it.
1976 // Start by folding any constants that we found.
1977 const DataLayout &DL = I->getModule()->getDataLayout();
1978 Constant *Cst = nullptr;
1979 unsigned Opcode = I->getOpcode();
1980 while (!Ops.empty()) {
1981 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1982 if (!Cst) {
1983 Ops.pop_back();
1984 Cst = C;
1985 continue;
1986 }
1987 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1988 Ops.pop_back();
1989 Cst = Res;
1990 continue;
1991 }
1992 }
1993 break;
1994 }
1995 // If there was nothing but constants then we are done.
1996 if (Ops.empty())
1997 return Cst;
1998
1999 // Put the combined constant back at the end of the operand list, except if
2000 // there is no point. For example, an add of 0 gets dropped here, while a
2001 // multiplication by zero turns the whole expression into zero.
2002 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
2003 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
2004 return Cst;
2005 Ops.push_back(ValueEntry(0, Cst));
2006 }
2007
2008 if (Ops.size() == 1) return Ops[0].Op;
2009
2010 // Handle destructive annihilation due to identities between elements in the
2011 // argument list here.
2012 unsigned NumOps = Ops.size();
2013 switch (Opcode) {
2014 default: break;
2015 case Instruction::And:
2016 case Instruction::Or:
2017 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2018 return Result;
2019 break;
2020
2021 case Instruction::Xor:
2022 if (Value *Result = OptimizeXor(I, Ops))
2023 return Result;
2024 break;
2025
2026 case Instruction::Add:
2027 case Instruction::FAdd:
2028 if (Value *Result = OptimizeAdd(I, Ops))
2029 return Result;
2030 break;
2031
2032 case Instruction::Mul:
2033 case Instruction::FMul:
2034 if (Value *Result = OptimizeMul(I, Ops))
2035 return Result;
2036 break;
2037 }
2038
2039 if (Ops.size() != NumOps)
2040 return OptimizeExpression(I, Ops);
2041 return nullptr;
2042}
2043
2044// Remove dead instructions and if any operands are trivially dead add them to
2045// Insts so they will be removed as well.
2046void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2047 OrderedSet &Insts) {
2048 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2049 SmallVector<Value *, 4> Ops(I->operands());
2050 ValueRankMap.erase(I);
2051 Insts.remove(I);
2052 RedoInsts.remove(I);
2054 I->eraseFromParent();
2055 for (auto *Op : Ops)
2056 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2057 if (OpInst->use_empty())
2058 Insts.insert(OpInst);
2059}
2060
2061/// Zap the given instruction, adding interesting operands to the work list.
2062void ReassociatePass::EraseInst(Instruction *I) {
2063 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2064 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2065
2066 SmallVector<Value *, 8> Ops(I->operands());
2067 // Erase the dead instruction.
2068 ValueRankMap.erase(I);
2069 RedoInsts.remove(I);
2071 I->eraseFromParent();
2072 // Optimize its operands.
2073 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2074 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2075 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2076 // If this is a node in an expression tree, climb to the expression root
2077 // and add that since that's where optimization actually happens.
2078 unsigned Opcode = Op->getOpcode();
2079 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2080 Visited.insert(Op).second)
2081 Op = Op->user_back();
2082
2083 // The instruction we're going to push may be coming from a
2084 // dead block, and Reassociate skips the processing of unreachable
2085 // blocks because it's a waste of time and also because it can
2086 // lead to infinite loop due to LLVM's non-standard definition
2087 // of dominance.
2088 if (ValueRankMap.contains(Op))
2089 RedoInsts.insert(Op);
2090 }
2091
2092 MadeChange = true;
2093}
2094
2095/// Recursively analyze an expression to build a list of instructions that have
2096/// negative floating-point constant operands. The caller can then transform
2097/// the list to create positive constants for better reassociation and CSE.
2099 SmallVectorImpl<Instruction *> &Candidates) {
2100 // Handle only one-use instructions. Combining negations does not justify
2101 // replicating instructions.
2102 Instruction *I;
2103 if (!match(V, m_OneUse(m_Instruction(I))))
2104 return;
2105
2106 // Handle expressions of multiplications and divisions.
2107 // TODO: This could look through floating-point casts.
2108 const APFloat *C;
2109 switch (I->getOpcode()) {
2110 case Instruction::FMul:
2111 // Not expecting non-canonical code here. Bail out and wait.
2112 if (match(I->getOperand(0), m_Constant()))
2113 break;
2114
2115 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2116 Candidates.push_back(I);
2117 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2118 }
2119 getNegatibleInsts(I->getOperand(0), Candidates);
2120 getNegatibleInsts(I->getOperand(1), Candidates);
2121 break;
2122 case Instruction::FDiv:
2123 // Not expecting non-canonical code here. Bail out and wait.
2124 if (match(I->getOperand(0), m_Constant()) &&
2125 match(I->getOperand(1), m_Constant()))
2126 break;
2127
2128 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2129 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2130 Candidates.push_back(I);
2131 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2132 }
2133 getNegatibleInsts(I->getOperand(0), Candidates);
2134 getNegatibleInsts(I->getOperand(1), Candidates);
2135 break;
2136 default:
2137 break;
2138 }
2139}
2140
2141/// Given an fadd/fsub with an operand that is a one-use instruction
2142/// (the fadd/fsub), try to change negative floating-point constants into
2143/// positive constants to increase potential for reassociation and CSE.
2144Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2145 Instruction *Op,
2146 Value *OtherOp) {
2147 assert((I->getOpcode() == Instruction::FAdd ||
2148 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2149
2150 // Collect instructions with negative FP constants from the subtree that ends
2151 // in Op.
2153 getNegatibleInsts(Op, Candidates);
2154 if (Candidates.empty())
2155 return nullptr;
2156
2157 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2158 // resulting subtract will be broken up later. This can get us into an
2159 // infinite loop during reassociation.
2160 bool IsFSub = I->getOpcode() == Instruction::FSub;
2161 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2162 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2163 return nullptr;
2164
2165 for (Instruction *Negatible : Candidates) {
2166 const APFloat *C;
2167 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2168 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2169 "Expecting only 1 constant operand");
2170 assert(C->isNegative() && "Expected negative FP constant");
2171 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2172 MadeChange = true;
2173 }
2174 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2175 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2176 "Expecting only 1 constant operand");
2177 assert(C->isNegative() && "Expected negative FP constant");
2178 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2179 MadeChange = true;
2180 }
2181 }
2182 assert(MadeChange == true && "Negative constant candidate was not changed");
2183
2184 // Negations cancelled out.
2185 if (Candidates.size() % 2 == 0)
2186 return I;
2187
2188 // Negate the final operand in the expression by flipping the opcode of this
2189 // fadd/fsub.
2190 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2191 IRBuilder<> Builder(I);
2192 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2193 : Builder.CreateFSubFMF(OtherOp, Op, I);
2194 I->replaceAllUsesWith(NewInst);
2195 RedoInsts.insert(I);
2196 return dyn_cast<Instruction>(NewInst);
2197}
2198
2199/// Canonicalize expressions that contain a negative floating-point constant
2200/// of the following form:
2201/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2202/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2203/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2204///
2205/// The fadd/fsub opcode may be switched to allow folding a negation into the
2206/// input instruction.
2207Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2208 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2209 Value *X;
2210 Instruction *Op;
2212 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2213 I = R;
2215 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2216 I = R;
2218 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2219 I = R;
2220 return I;
2221}
2222
2223/// Inspect and optimize the given instruction. Note that erasing
2224/// instructions is not allowed.
2225void ReassociatePass::OptimizeInst(Instruction *I) {
2226 // Only consider operations that we understand.
2227 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2228 return;
2229
2230 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2231 // If an operand of this shift is a reassociable multiply, or if the shift
2232 // is used by a reassociable multiply or add, turn into a multiply.
2233 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2234 (I->hasOneUse() &&
2235 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2236 isReassociableOp(I->user_back(), Instruction::Add)))) {
2238 RedoInsts.insert(I);
2239 MadeChange = true;
2240 I = NI;
2241 }
2242
2243 // Commute binary operators, to canonicalize the order of their operands.
2244 // This can potentially expose more CSE opportunities, and makes writing other
2245 // transformations simpler.
2246 if (I->isCommutative())
2247 canonicalizeOperands(I);
2248
2249 // Canonicalize negative constants out of expressions.
2250 if (Instruction *Res = canonicalizeNegFPConstants(I))
2251 I = Res;
2252
2253 // Don't optimize floating-point instructions unless they have the
2254 // appropriate FastMathFlags for reassociation enabled.
2255 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2256 return;
2257
2258 // Do not reassociate boolean (i1) expressions. We want to preserve the
2259 // original order of evaluation for short-circuited comparisons that
2260 // SimplifyCFG has folded to AND/OR expressions. If the expression
2261 // is not further optimized, it is likely to be transformed back to a
2262 // short-circuited form for code gen, and the source order may have been
2263 // optimized for the most likely conditions.
2264 if (I->getType()->isIntegerTy(1))
2265 return;
2266
2267 // If this is a bitwise or instruction of operands
2268 // with no common bits set, convert it to X+Y.
2269 if (I->getOpcode() == Instruction::Or &&
2271 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2272 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2273 SimplifyQuery(I->getModule()->getDataLayout(),
2274 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2276 RedoInsts.insert(I);
2277 MadeChange = true;
2278 I = NI;
2279 }
2280
2281 // If this is a subtract instruction which is not already in negate form,
2282 // see if we can convert it to X+-Y.
2283 if (I->getOpcode() == Instruction::Sub) {
2284 if (ShouldBreakUpSubtract(I)) {
2285 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2286 RedoInsts.insert(I);
2287 MadeChange = true;
2288 I = NI;
2289 } else if (match(I, m_Neg(m_Value()))) {
2290 // Otherwise, this is a negation. See if the operand is a multiply tree
2291 // and if this is not an inner node of a multiply tree.
2292 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2293 (!I->hasOneUse() ||
2294 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2296 // If the negate was simplified, revisit the users to see if we can
2297 // reassociate further.
2298 for (User *U : NI->users()) {
2299 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2300 RedoInsts.insert(Tmp);
2301 }
2302 RedoInsts.insert(I);
2303 MadeChange = true;
2304 I = NI;
2305 }
2306 }
2307 } else if (I->getOpcode() == Instruction::FNeg ||
2308 I->getOpcode() == Instruction::FSub) {
2309 if (ShouldBreakUpSubtract(I)) {
2310 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2311 RedoInsts.insert(I);
2312 MadeChange = true;
2313 I = NI;
2314 } else if (match(I, m_FNeg(m_Value()))) {
2315 // Otherwise, this is a negation. See if the operand is a multiply tree
2316 // and if this is not an inner node of a multiply tree.
2317 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2318 I->getOperand(0);
2319 if (isReassociableOp(Op, Instruction::FMul) &&
2320 (!I->hasOneUse() ||
2321 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2322 // If the negate was simplified, revisit the users to see if we can
2323 // reassociate further.
2325 for (User *U : NI->users()) {
2326 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2327 RedoInsts.insert(Tmp);
2328 }
2329 RedoInsts.insert(I);
2330 MadeChange = true;
2331 I = NI;
2332 }
2333 }
2334 }
2335
2336 // If this instruction is an associative binary operator, process it.
2337 if (!I->isAssociative()) return;
2338 BinaryOperator *BO = cast<BinaryOperator>(I);
2339
2340 // If this is an interior node of a reassociable tree, ignore it until we
2341 // get to the root of the tree, to avoid N^2 analysis.
2342 unsigned Opcode = BO->getOpcode();
2343 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2344 // During the initial run we will get to the root of the tree.
2345 // But if we get here while we are redoing instructions, there is no
2346 // guarantee that the root will be visited. So Redo later
2347 if (BO->user_back() != BO &&
2348 BO->getParent() == BO->user_back()->getParent())
2349 RedoInsts.insert(BO->user_back());
2350 return;
2351 }
2352
2353 // If this is an add tree that is used by a sub instruction, ignore it
2354 // until we process the subtract.
2355 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2356 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2357 return;
2358 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2359 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2360 return;
2361
2362 ReassociateExpression(BO);
2363}
2364
2365void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2366 // First, walk the expression tree, linearizing the tree, collecting the
2367 // operand information.
2369 bool HasNUW = true;
2370 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW);
2372 Ops.reserve(Tree.size());
2373 for (const RepeatedValue &E : Tree)
2374 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2375
2376 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2377
2378 // Now that we have linearized the tree to a list and have gathered all of
2379 // the operands and their ranks, sort the operands by their rank. Use a
2380 // stable_sort so that values with equal ranks will have their relative
2381 // positions maintained (and so the compiler is deterministic). Note that
2382 // this sorts so that the highest ranking values end up at the beginning of
2383 // the vector.
2384 llvm::stable_sort(Ops);
2385
2386 // Now that we have the expression tree in a convenient
2387 // sorted form, optimize it globally if possible.
2388 if (Value *V = OptimizeExpression(I, Ops)) {
2389 if (V == I)
2390 // Self-referential expression in unreachable code.
2391 return;
2392 // This expression tree simplified to something that isn't a tree,
2393 // eliminate it.
2394 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2395 I->replaceAllUsesWith(V);
2396 if (Instruction *VI = dyn_cast<Instruction>(V))
2397 if (I->getDebugLoc())
2398 VI->setDebugLoc(I->getDebugLoc());
2399 RedoInsts.insert(I);
2400 ++NumAnnihil;
2401 return;
2402 }
2403
2404 // We want to sink immediates as deeply as possible except in the case where
2405 // this is a multiply tree used only by an add, and the immediate is a -1.
2406 // In this case we reassociate to put the negation on the outside so that we
2407 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2408 if (I->hasOneUse()) {
2409 if (I->getOpcode() == Instruction::Mul &&
2410 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2411 isa<ConstantInt>(Ops.back().Op) &&
2412 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2413 ValueEntry Tmp = Ops.pop_back_val();
2414 Ops.insert(Ops.begin(), Tmp);
2415 } else if (I->getOpcode() == Instruction::FMul &&
2416 cast<Instruction>(I->user_back())->getOpcode() ==
2417 Instruction::FAdd &&
2418 isa<ConstantFP>(Ops.back().Op) &&
2419 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2420 ValueEntry Tmp = Ops.pop_back_val();
2421 Ops.insert(Ops.begin(), Tmp);
2422 }
2423 }
2424
2425 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2426
2427 if (Ops.size() == 1) {
2428 if (Ops[0].Op == I)
2429 // Self-referential expression in unreachable code.
2430 return;
2431
2432 // This expression tree simplified to something that isn't a tree,
2433 // eliminate it.
2434 I->replaceAllUsesWith(Ops[0].Op);
2435 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2436 OI->setDebugLoc(I->getDebugLoc());
2437 RedoInsts.insert(I);
2438 return;
2439 }
2440
2441 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2442 // Find the pair with the highest count in the pairmap and move it to the
2443 // back of the list so that it can later be CSE'd.
2444 // example:
2445 // a*b*c*d*e
2446 // if c*e is the most "popular" pair, we can express this as
2447 // (((c*e)*d)*b)*a
2448 unsigned Max = 1;
2449 unsigned BestRank = 0;
2450 std::pair<unsigned, unsigned> BestPair;
2451 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2452 unsigned LimitIdx = 0;
2453 // With the CSE-driven heuristic, we are about to slap two values at the
2454 // beginning of the expression whereas they could live very late in the CFG.
2455 // When using the CSE-local heuristic we avoid creating dependences from
2456 // completely unrelated part of the CFG by limiting the expression
2457 // reordering on the values that live in the first seen basic block.
2458 // The main idea is that we want to avoid forming expressions that would
2459 // become loop dependent.
2460 if (UseCSELocalOpt) {
2461 const BasicBlock *FirstSeenBB = nullptr;
2462 int StartIdx = Ops.size() - 1;
2463 // Skip the first value of the expression since we need at least two
2464 // values to materialize an expression. I.e., even if this value is
2465 // anchored in a different basic block, the actual first sub expression
2466 // will be anchored on the second value.
2467 for (int i = StartIdx - 1; i != -1; --i) {
2468 const Value *Val = Ops[i].Op;
2469 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2470 const BasicBlock *SeenBB = nullptr;
2471 if (!CurrLeafInstr) {
2472 // The value is free of any CFG dependencies.
2473 // Do as if it lives in the entry block.
2474 //
2475 // We do this to make sure all the values falling on this path are
2476 // seen through the same anchor point. The rationale is these values
2477 // can be combined together to from a sub expression free of any CFG
2478 // dependencies so we want them to stay together.
2479 // We could be cleverer and postpone the anchor down to the first
2480 // anchored value, but that's likely complicated to get right.
2481 // E.g., we wouldn't want to do that if that means being stuck in a
2482 // loop.
2483 //
2484 // For instance, we wouldn't want to change:
2485 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2486 // into
2487 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2488 // Because all the sub expressions with arg2..N would be stuck between
2489 // two loop dependent values.
2490 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2491 } else {
2492 SeenBB = CurrLeafInstr->getParent();
2493 }
2494
2495 if (!FirstSeenBB) {
2496 FirstSeenBB = SeenBB;
2497 continue;
2498 }
2499 if (FirstSeenBB != SeenBB) {
2500 // ith value is in a different basic block.
2501 // Rewind the index once to point to the last value on the same basic
2502 // block.
2503 LimitIdx = i + 1;
2504 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2505 << LimitIdx << ", " << StartIdx << "]\n");
2506 break;
2507 }
2508 }
2509 }
2510 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2511 // We must use int type to go below zero when LimitIdx is 0.
2512 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2513 unsigned Score = 0;
2514 Value *Op0 = Ops[i].Op;
2515 Value *Op1 = Ops[j].Op;
2516 if (std::less<Value *>()(Op1, Op0))
2517 std::swap(Op0, Op1);
2518 auto it = PairMap[Idx].find({Op0, Op1});
2519 if (it != PairMap[Idx].end()) {
2520 // Functions like BreakUpSubtract() can erase the Values we're using
2521 // as keys and create new Values after we built the PairMap. There's a
2522 // small chance that the new nodes can have the same address as
2523 // something already in the table. We shouldn't accumulate the stored
2524 // score in that case as it refers to the wrong Value.
2525 if (it->second.isValid())
2526 Score += it->second.Score;
2527 }
2528
2529 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2530
2531 // By construction, the operands are sorted in reverse order of their
2532 // topological order.
2533 // So we tend to form (sub) expressions with values that are close to
2534 // each other.
2535 //
2536 // Now to expose more CSE opportunities we want to expose the pair of
2537 // operands that occur the most (as statically computed in
2538 // BuildPairMap.) as the first sub-expression.
2539 //
2540 // If two pairs occur as many times, we pick the one with the
2541 // lowest rank, meaning the one with both operands appearing first in
2542 // the topological order.
2543 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2544 BestPair = {j, i};
2545 Max = Score;
2546 BestRank = MaxRank;
2547 }
2548 }
2549 }
2550 if (Max > 1) {
2551 auto Op0 = Ops[BestPair.first];
2552 auto Op1 = Ops[BestPair.second];
2553 Ops.erase(&Ops[BestPair.second]);
2554 Ops.erase(&Ops[BestPair.first]);
2555 Ops.push_back(Op0);
2556 Ops.push_back(Op1);
2557 }
2558 }
2559 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2560 dbgs() << '\n');
2561 // Now that we ordered and optimized the expressions, splat them back into
2562 // the expression tree, removing any unneeded nodes.
2563 RewriteExprTree(I, Ops, HasNUW);
2564}
2565
2566void
2567ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2568 // Make a "pairmap" of how often each operand pair occurs.
2569 for (BasicBlock *BI : RPOT) {
2570 for (Instruction &I : *BI) {
2571 if (!I.isAssociative() || !I.isBinaryOp())
2572 continue;
2573
2574 // Ignore nodes that aren't at the root of trees.
2575 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2576 continue;
2577
2578 // Collect all operands in a single reassociable expression.
2579 // Since Reassociate has already been run once, we can assume things
2580 // are already canonical according to Reassociation's regime.
2581 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2583 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2584 Value *Op = Worklist.pop_back_val();
2585 Instruction *OpI = dyn_cast<Instruction>(Op);
2586 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2587 Ops.push_back(Op);
2588 continue;
2589 }
2590 // Be paranoid about self-referencing expressions in unreachable code.
2591 if (OpI->getOperand(0) != OpI)
2592 Worklist.push_back(OpI->getOperand(0));
2593 if (OpI->getOperand(1) != OpI)
2594 Worklist.push_back(OpI->getOperand(1));
2595 }
2596 // Skip extremely long expressions.
2597 if (Ops.size() > GlobalReassociateLimit)
2598 continue;
2599
2600 // Add all pairwise combinations of operands to the pair map.
2601 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2603 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2604 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2605 // Canonicalize operand orderings.
2606 Value *Op0 = Ops[i];
2607 Value *Op1 = Ops[j];
2608 if (std::less<Value *>()(Op1, Op0))
2609 std::swap(Op0, Op1);
2610 if (!Visited.insert({Op0, Op1}).second)
2611 continue;
2612 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2613 if (!res.second) {
2614 // If either key value has been erased then we've got the same
2615 // address by coincidence. That can't happen here because nothing is
2616 // erasing values but it can happen by the time we're querying the
2617 // map.
2618 assert(res.first->second.isValid() && "WeakVH invalidated");
2619 ++res.first->second.Score;
2620 }
2621 }
2622 }
2623 }
2624 }
2625}
2626
2628 // Get the functions basic blocks in Reverse Post Order. This order is used by
2629 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2630 // blocks (it has been seen that the analysis in this pass could hang when
2631 // analysing dead basic blocks).
2633
2634 // Calculate the rank map for F.
2635 BuildRankMap(F, RPOT);
2636
2637 // Build the pair map before running reassociate.
2638 // Technically this would be more accurate if we did it after one round
2639 // of reassociation, but in practice it doesn't seem to help much on
2640 // real-world code, so don't waste the compile time running reassociate
2641 // twice.
2642 // If a user wants, they could expicitly run reassociate twice in their
2643 // pass pipeline for further potential gains.
2644 // It might also be possible to update the pair map during runtime, but the
2645 // overhead of that may be large if there's many reassociable chains.
2646 BuildPairMap(RPOT);
2647
2648 MadeChange = false;
2649
2650 // Traverse the same blocks that were analysed by BuildRankMap.
2651 for (BasicBlock *BI : RPOT) {
2652 assert(RankMap.count(&*BI) && "BB should be ranked.");
2653 // Optimize every instruction in the basic block.
2654 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2655 if (isInstructionTriviallyDead(&*II)) {
2656 EraseInst(&*II++);
2657 } else {
2658 OptimizeInst(&*II);
2659 assert(II->getParent() == &*BI && "Moved to a different block!");
2660 ++II;
2661 }
2662
2663 // Make a copy of all the instructions to be redone so we can remove dead
2664 // instructions.
2665 OrderedSet ToRedo(RedoInsts);
2666 // Iterate over all instructions to be reevaluated and remove trivially dead
2667 // instructions. If any operand of the trivially dead instruction becomes
2668 // dead mark it for deletion as well. Continue this process until all
2669 // trivially dead instructions have been removed.
2670 while (!ToRedo.empty()) {
2671 Instruction *I = ToRedo.pop_back_val();
2673 RecursivelyEraseDeadInsts(I, ToRedo);
2674 MadeChange = true;
2675 }
2676 }
2677
2678 // Now that we have removed dead instructions, we can reoptimize the
2679 // remaining instructions.
2680 while (!RedoInsts.empty()) {
2681 Instruction *I = RedoInsts.front();
2682 RedoInsts.erase(RedoInsts.begin());
2684 EraseInst(I);
2685 else
2686 OptimizeInst(I);
2687 }
2688 }
2689
2690 // We are done with the rank map and pair map.
2691 RankMap.clear();
2692 ValueRankMap.clear();
2693 for (auto &Entry : PairMap)
2694 Entry.clear();
2695
2696 if (MadeChange) {
2699 return PA;
2700 }
2701
2702 return PreservedAnalyses::all();
2703}
2704
2705namespace {
2706
2707 class ReassociateLegacyPass : public FunctionPass {
2708 ReassociatePass Impl;
2709
2710 public:
2711 static char ID; // Pass identification, replacement for typeid
2712
2713 ReassociateLegacyPass() : FunctionPass(ID) {
2715 }
2716
2717 bool runOnFunction(Function &F) override {
2718 if (skipFunction(F))
2719 return false;
2720
2721 FunctionAnalysisManager DummyFAM;
2722 auto PA = Impl.run(F, DummyFAM);
2723 return !PA.areAllPreserved();
2724 }
2725
2726 void getAnalysisUsage(AnalysisUsage &AU) const override {
2727 AU.setPreservesCFG();
2731 }
2732 };
2733
2734} // end anonymous namespace
2735
2736char ReassociateLegacyPass::ID = 0;
2737
2738INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2739 "Reassociate expressions", false, false)
2740
2741// Public interface to the Reassociate pass
2743 return new ReassociateLegacyPass();
2744}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition: LICM.cpp:2673
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
nary reassociate
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, bool &HasNUW)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
std::pair< Value *, APInt > RepeatedValue
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:395
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:214
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:422
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:173
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2669
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2562
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2598
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2525
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:779
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1541
Value * CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1568
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1581
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:452
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:150
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:147
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:718
const char * getOpcodeName() const
Definition: Instruction.h:252
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:250
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:449
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:704
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: Analysis.h:281
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
Reassociate commutative expressions.
Definition: Reassociate.h:71
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
self_iterator getIterator()
Definition: ilist_node.h:109
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:724
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:988
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:237
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:1975
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1582
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1381
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:610
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1950
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59