LLVM 17.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
55#include "llvm/Support/Debug.h"
59#include <algorithm>
60#include <cassert>
61#include <utility>
62
63using namespace llvm;
64using namespace reassociate;
65using namespace PatternMatch;
66
67#define DEBUG_TYPE "reassociate"
68
69STATISTIC(NumChanged, "Number of insts reassociated");
70STATISTIC(NumAnnihil, "Number of expr tree annihilated");
71STATISTIC(NumFactor , "Number of multiplies factored");
72
73#ifndef NDEBUG
74/// Print out the expression identified in the Ops list.
76 Module *M = I->getModule();
77 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
78 << *Ops[0].Op->getType() << '\t';
79 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
80 dbgs() << "[ ";
81 Ops[i].Op->printAsOperand(dbgs(), false, M);
82 dbgs() << ", #" << Ops[i].Rank << "] ";
83 }
84}
85#endif
86
87/// Utility class representing a non-constant Xor-operand. We classify
88/// non-constant Xor-Operands into two categories:
89/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
90/// C2)
91/// C2.1) The operand is in the form of "X | C", where C is a non-zero
92/// constant.
93/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
94/// operand as "E | 0"
96public:
97 XorOpnd(Value *V);
98
99 bool isInvalid() const { return SymbolicPart == nullptr; }
100 bool isOrExpr() const { return isOr; }
101 Value *getValue() const { return OrigVal; }
102 Value *getSymbolicPart() const { return SymbolicPart; }
103 unsigned getSymbolicRank() const { return SymbolicRank; }
104 const APInt &getConstPart() const { return ConstPart; }
105
106 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
107 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
108
109private:
110 Value *OrigVal;
111 Value *SymbolicPart;
112 APInt ConstPart;
113 unsigned SymbolicRank;
114 bool isOr;
115};
116
118 assert(!isa<ConstantInt>(V) && "No ConstantInt");
119 OrigVal = V;
120 Instruction *I = dyn_cast<Instruction>(V);
121 SymbolicRank = 0;
122
123 if (I && (I->getOpcode() == Instruction::Or ||
124 I->getOpcode() == Instruction::And)) {
125 Value *V0 = I->getOperand(0);
126 Value *V1 = I->getOperand(1);
127 const APInt *C;
128 if (match(V0, m_APInt(C)))
129 std::swap(V0, V1);
130
131 if (match(V1, m_APInt(C))) {
132 ConstPart = *C;
133 SymbolicPart = V0;
134 isOr = (I->getOpcode() == Instruction::Or);
135 return;
136 }
137 }
138
139 // view the operand as "V | 0"
140 SymbolicPart = V;
141 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
142 isOr = true;
143}
144
145/// Return true if I is an instruction with the FastMathFlags that are needed
146/// for general reassociation set. This is not the same as testing
147/// Instruction::isAssociative() because it includes operations like fsub.
148/// (This routine is only intended to be called for floating-point operations.)
150 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
151 return I->hasAllowReassoc() && I->hasNoSignedZeros();
152}
153
154/// Return true if V is an instruction of the specified opcode and if it
155/// only has one use.
156static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
157 auto *BO = dyn_cast<BinaryOperator>(V);
158 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
159 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
160 return BO;
161 return nullptr;
162}
163
164static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
165 unsigned Opcode2) {
166 auto *BO = dyn_cast<BinaryOperator>(V);
167 if (BO && BO->hasOneUse() &&
168 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
169 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
170 return BO;
171 return nullptr;
172}
173
174void ReassociatePass::BuildRankMap(Function &F,
176 unsigned Rank = 2;
177
178 // Assign distinct ranks to function arguments.
179 for (auto &Arg : F.args()) {
180 ValueRankMap[&Arg] = ++Rank;
181 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
182 << "\n");
183 }
184
185 // Traverse basic blocks in ReversePostOrder.
186 for (BasicBlock *BB : RPOT) {
187 unsigned BBRank = RankMap[BB] = ++Rank << 16;
188
189 // Walk the basic block, adding precomputed ranks for any instructions that
190 // we cannot move. This ensures that the ranks for these instructions are
191 // all different in the block.
192 for (Instruction &I : *BB)
194 ValueRankMap[&I] = ++BBRank;
195 }
196}
197
198unsigned ReassociatePass::getRank(Value *V) {
199 Instruction *I = dyn_cast<Instruction>(V);
200 if (!I) {
201 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
202 return 0; // Otherwise it's a global or constant, rank 0.
203 }
204
205 if (unsigned Rank = ValueRankMap[I])
206 return Rank; // Rank already known?
207
208 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
209 // we can reassociate expressions for code motion! Since we do not recurse
210 // for PHI nodes, we cannot have infinite recursion here, because there
211 // cannot be loops in the value graph that do not go through PHI nodes.
212 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
213 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
214 Rank = std::max(Rank, getRank(I->getOperand(i)));
215
216 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
217 // assures us that X and ~X will have the same rank.
218 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
219 !match(I, m_FNeg(m_Value())))
220 ++Rank;
221
222 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
223 << "\n");
224
225 return ValueRankMap[I] = Rank;
226}
227
228// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
229void ReassociatePass::canonicalizeOperands(Instruction *I) {
230 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
231 assert(I->isCommutative() && "Expected commutative operator.");
232
233 Value *LHS = I->getOperand(0);
234 Value *RHS = I->getOperand(1);
235 if (LHS == RHS || isa<Constant>(RHS))
236 return;
237 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
238 cast<BinaryOperator>(I)->swapOperands();
239}
240
241static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
242 Instruction *InsertBefore, Value *FlagsOp) {
243 if (S1->getType()->isIntOrIntVectorTy())
244 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
245 else {
246 BinaryOperator *Res =
247 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
248 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
249 return Res;
250 }
251}
252
253static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
254 Instruction *InsertBefore, Value *FlagsOp) {
255 if (S1->getType()->isIntOrIntVectorTy())
256 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
257 else {
258 BinaryOperator *Res =
259 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
260 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
261 return Res;
262 }
263}
264
266 Instruction *InsertBefore, Value *FlagsOp) {
267 if (S1->getType()->isIntOrIntVectorTy())
268 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
269
270 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
271 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
272
273 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
274}
275
276/// Replace 0-X with X*-1.
278 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
279 "Expected a Negate!");
280 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
281 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
282 Type *Ty = Neg->getType();
283 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
285
286 BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
287 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
288 Res->takeName(Neg);
289 Neg->replaceAllUsesWith(Res);
290 Res->setDebugLoc(Neg->getDebugLoc());
291 return Res;
292}
293
294/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
295/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
296/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
297/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
298/// even x in Bitwidth-bit arithmetic.
299static unsigned CarmichaelShift(unsigned Bitwidth) {
300 if (Bitwidth < 3)
301 return Bitwidth - 1;
302 return Bitwidth - 2;
303}
304
305/// Add the extra weight 'RHS' to the existing weight 'LHS',
306/// reducing the combined weight using any special properties of the operation.
307/// The existing weight LHS represents the computation X op X op ... op X where
308/// X occurs LHS times. The combined weight represents X op X op ... op X with
309/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
310/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
311/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
312static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
313 // If we were working with infinite precision arithmetic then the combined
314 // weight would be LHS + RHS. But we are using finite precision arithmetic,
315 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
316 // for nilpotent operations and addition, but not for idempotent operations
317 // and multiplication), so it is important to correctly reduce the combined
318 // weight back into range if wrapping would be wrong.
319
320 // If RHS is zero then the weight didn't change.
321 if (RHS.isMinValue())
322 return;
323 // If LHS is zero then the combined weight is RHS.
324 if (LHS.isMinValue()) {
325 LHS = RHS;
326 return;
327 }
328 // From this point on we know that neither LHS nor RHS is zero.
329
330 if (Instruction::isIdempotent(Opcode)) {
331 // Idempotent means X op X === X, so any non-zero weight is equivalent to a
332 // weight of 1. Keeping weights at zero or one also means that wrapping is
333 // not a problem.
334 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
335 return; // Return a weight of 1.
336 }
337 if (Instruction::isNilpotent(Opcode)) {
338 // Nilpotent means X op X === 0, so reduce weights modulo 2.
339 assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
340 LHS = 0; // 1 + 1 === 0 modulo 2.
341 return;
342 }
343 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
344 // TODO: Reduce the weight by exploiting nsw/nuw?
345 LHS += RHS;
346 return;
347 }
348
349 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
350 "Unknown associative operation!");
351 unsigned Bitwidth = LHS.getBitWidth();
352 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
353 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
354 // bit number x, since either x is odd in which case x^CM = 1, or x is even in
355 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
356 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
357 // which by a happy accident means that they can always be represented using
358 // Bitwidth bits.
359 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
360 // the Carmichael number).
361 if (Bitwidth > 3) {
362 /// CM - The value of Carmichael's lambda function.
363 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
364 // Any weight W >= Threshold can be replaced with W - CM.
365 APInt Threshold = CM + Bitwidth;
366 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
367 // For Bitwidth 4 or more the following sum does not overflow.
368 LHS += RHS;
369 while (LHS.uge(Threshold))
370 LHS -= CM;
371 } else {
372 // To avoid problems with overflow do everything the same as above but using
373 // a larger type.
374 unsigned CM = 1U << CarmichaelShift(Bitwidth);
375 unsigned Threshold = CM + Bitwidth;
376 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
377 "Weights not reduced!");
378 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
379 while (Total >= Threshold)
380 Total -= CM;
381 LHS = Total;
382 }
383}
384
385using RepeatedValue = std::pair<Value*, APInt>;
386
387/// Given an associative binary expression, return the leaf
388/// nodes in Ops along with their weights (how many times the leaf occurs). The
389/// original expression is the same as
390/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
391/// op
392/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
393/// op
394/// ...
395/// op
396/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
397///
398/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
399///
400/// This routine may modify the function, in which case it returns 'true'. The
401/// changes it makes may well be destructive, changing the value computed by 'I'
402/// to something completely different. Thus if the routine returns 'true' then
403/// you MUST either replace I with a new expression computed from the Ops array,
404/// or use RewriteExprTree to put the values back in.
405///
406/// A leaf node is either not a binary operation of the same kind as the root
407/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
408/// opcode), or is the same kind of binary operator but has a use which either
409/// does not belong to the expression, or does belong to the expression but is
410/// a leaf node. Every leaf node has at least one use that is a non-leaf node
411/// of the expression, while for non-leaf nodes (except for the root 'I') every
412/// use is a non-leaf node of the expression.
413///
414/// For example:
415/// expression graph node names
416///
417/// + | I
418/// / \ |
419/// + + | A, B
420/// / \ / \ |
421/// * + * | C, D, E
422/// / \ / \ / \ |
423/// + * | F, G
424///
425/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
426/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
427///
428/// The expression is maximal: if some instruction is a binary operator of the
429/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
430/// then the instruction also belongs to the expression, is not a leaf node of
431/// it, and its operands also belong to the expression (but may be leaf nodes).
432///
433/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
434/// order to ensure that every non-root node in the expression has *exactly one*
435/// use by a non-leaf node of the expression. This destruction means that the
436/// caller MUST either replace 'I' with a new expression or use something like
437/// RewriteExprTree to put the values back in if the routine indicates that it
438/// made a change by returning 'true'.
439///
440/// In the above example either the right operand of A or the left operand of B
441/// will be replaced by undef. If it is B's operand then this gives:
442///
443/// + | I
444/// / \ |
445/// + + | A, B - operand of B replaced with undef
446/// / \ \ |
447/// * + * | C, D, E
448/// / \ / \ / \ |
449/// + * | F, G
450///
451/// Note that such undef operands can only be reached by passing through 'I'.
452/// For example, if you visit operands recursively starting from a leaf node
453/// then you will never see such an undef operand unless you get back to 'I',
454/// which requires passing through a phi node.
455///
456/// Note that this routine may also mutate binary operators of the wrong type
457/// that have all uses inside the expression (i.e. only used by non-leaf nodes
458/// of the expression) if it can turn them into binary operators of the right
459/// type and thus make the expression bigger.
463 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
464 "Expected a UnaryOperator or BinaryOperator!");
465 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
466 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
467 unsigned Opcode = I->getOpcode();
468 assert(I->isAssociative() && I->isCommutative() &&
469 "Expected an associative and commutative operation!");
470
471 // Visit all operands of the expression, keeping track of their weight (the
472 // number of paths from the expression root to the operand, or if you like
473 // the number of times that operand occurs in the linearized expression).
474 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
475 // while A has weight two.
476
477 // Worklist of non-leaf nodes (their operands are in the expression too) along
478 // with their weights, representing a certain number of paths to the operator.
479 // If an operator occurs in the worklist multiple times then we found multiple
480 // ways to get to it.
481 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
482 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
483 bool Changed = false;
484
485 // Leaves of the expression are values that either aren't the right kind of
486 // operation (eg: a constant, or a multiply in an add tree), or are, but have
487 // some uses that are not inside the expression. For example, in I = X + X,
488 // X = A + B, the value X has two uses (by I) that are in the expression. If
489 // X has any other uses, for example in a return instruction, then we consider
490 // X to be a leaf, and won't analyze it further. When we first visit a value,
491 // if it has more than one use then at first we conservatively consider it to
492 // be a leaf. Later, as the expression is explored, we may discover some more
493 // uses of the value from inside the expression. If all uses turn out to be
494 // from within the expression (and the value is a binary operator of the right
495 // kind) then the value is no longer considered to be a leaf, and its operands
496 // are explored.
497
498 // Leaves - Keeps track of the set of putative leaves as well as the number of
499 // paths to each leaf seen so far.
500 using LeafMap = DenseMap<Value *, APInt>;
501 LeafMap Leaves; // Leaf -> Total weight so far.
502 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
503
504#ifndef NDEBUG
505 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
506#endif
507 while (!Worklist.empty()) {
508 std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
509 I = P.first; // We examine the operands of this binary operator.
510
511 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
512 Value *Op = I->getOperand(OpIdx);
513 APInt Weight = P.second; // Number of paths to this operand.
514 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
515 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
516
517 // If this is a binary operation of the right kind with only one use then
518 // add its operands to the expression.
519 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
520 assert(Visited.insert(Op).second && "Not first visit!");
521 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
522 Worklist.push_back(std::make_pair(BO, Weight));
523 continue;
524 }
525
526 // Appears to be a leaf. Is the operand already in the set of leaves?
527 LeafMap::iterator It = Leaves.find(Op);
528 if (It == Leaves.end()) {
529 // Not in the leaf map. Must be the first time we saw this operand.
530 assert(Visited.insert(Op).second && "Not first visit!");
531 if (!Op->hasOneUse()) {
532 // This value has uses not accounted for by the expression, so it is
533 // not safe to modify. Mark it as being a leaf.
535 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
536 LeafOrder.push_back(Op);
537 Leaves[Op] = Weight;
538 continue;
539 }
540 // No uses outside the expression, try morphing it.
541 } else {
542 // Already in the leaf map.
543 assert(It != Leaves.end() && Visited.count(Op) &&
544 "In leaf map but not visited!");
545
546 // Update the number of paths to the leaf.
547 IncorporateWeight(It->second, Weight, Opcode);
548
549#if 0 // TODO: Re-enable once PR13021 is fixed.
550 // The leaf already has one use from inside the expression. As we want
551 // exactly one such use, drop this new use of the leaf.
552 assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
553 I->setOperand(OpIdx, UndefValue::get(I->getType()));
554 Changed = true;
555
556 // If the leaf is a binary operation of the right kind and we now see
557 // that its multiple original uses were in fact all by nodes belonging
558 // to the expression, then no longer consider it to be a leaf and add
559 // its operands to the expression.
560 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
561 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
562 Worklist.push_back(std::make_pair(BO, It->second));
563 Leaves.erase(It);
564 continue;
565 }
566#endif
567
568 // If we still have uses that are not accounted for by the expression
569 // then it is not safe to modify the value.
570 if (!Op->hasOneUse())
571 continue;
572
573 // No uses outside the expression, try morphing it.
574 Weight = It->second;
575 Leaves.erase(It); // Since the value may be morphed below.
576 }
577
578 // At this point we have a value which, first of all, is not a binary
579 // expression of the right kind, and secondly, is only used inside the
580 // expression. This means that it can safely be modified. See if we
581 // can usefully morph it into an expression of the right kind.
582 assert((!isa<Instruction>(Op) ||
583 cast<Instruction>(Op)->getOpcode() != Opcode
584 || (isa<FPMathOperator>(Op) &&
585 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
586 "Should have been handled above!");
587 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
588
589 // If this is a multiply expression, turn any internal negations into
590 // multiplies by -1 so they can be reassociated. Add any users of the
591 // newly created multiplication by -1 to the redo list, so any
592 // reassociation opportunities that are exposed will be reassociated
593 // further.
594 Instruction *Neg;
595 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
596 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
597 match(Op, m_Instruction(Neg))) {
599 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
601 LLVM_DEBUG(dbgs() << *Mul << '\n');
602 Worklist.push_back(std::make_pair(Mul, Weight));
603 for (User *U : Mul->users()) {
604 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
605 ToRedo.insert(UserBO);
606 }
607 ToRedo.insert(Neg);
608 Changed = true;
609 continue;
610 }
611
612 // Failed to morph into an expression of the right type. This really is
613 // a leaf.
614 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
615 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
616 LeafOrder.push_back(Op);
617 Leaves[Op] = Weight;
618 }
619 }
620
621 // The leaves, repeated according to their weights, represent the linearized
622 // form of the expression.
623 for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
624 Value *V = LeafOrder[i];
625 LeafMap::iterator It = Leaves.find(V);
626 if (It == Leaves.end())
627 // Node initially thought to be a leaf wasn't.
628 continue;
629 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
630 APInt Weight = It->second;
631 if (Weight.isMinValue())
632 // Leaf already output or weight reduction eliminated it.
633 continue;
634 // Ensure the leaf is only output once.
635 It->second = 0;
636 Ops.push_back(std::make_pair(V, Weight));
637 }
638
639 // For nilpotent operations or addition there may be no operands, for example
640 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
641 // in both cases the weight reduces to 0 causing the value to be skipped.
642 if (Ops.empty()) {
643 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
644 assert(Identity && "Associative operation without identity!");
645 Ops.emplace_back(Identity, APInt(Bitwidth, 1));
646 }
647
648 return Changed;
649}
650
651/// Now that the operands for this expression tree are
652/// linearized and optimized, emit them in-order.
653void ReassociatePass::RewriteExprTree(BinaryOperator *I,
655 assert(Ops.size() > 1 && "Single values should be used directly!");
656
657 // Since our optimizations should never increase the number of operations, the
658 // new expression can usually be written reusing the existing binary operators
659 // from the original expression tree, without creating any new instructions,
660 // though the rewritten expression may have a completely different topology.
661 // We take care to not change anything if the new expression will be the same
662 // as the original. If more than trivial changes (like commuting operands)
663 // were made then we are obliged to clear out any optional subclass data like
664 // nsw flags.
665
666 /// NodesToRewrite - Nodes from the original expression available for writing
667 /// the new expression into.
668 SmallVector<BinaryOperator*, 8> NodesToRewrite;
669 unsigned Opcode = I->getOpcode();
671
672 /// NotRewritable - The operands being written will be the leaves of the new
673 /// expression and must not be used as inner nodes (via NodesToRewrite) by
674 /// mistake. Inner nodes are always reassociable, and usually leaves are not
675 /// (if they were they would have been incorporated into the expression and so
676 /// would not be leaves), so most of the time there is no danger of this. But
677 /// in rare cases a leaf may become reassociable if an optimization kills uses
678 /// of it, or it may momentarily become reassociable during rewriting (below)
679 /// due it being removed as an operand of one of its uses. Ensure that misuse
680 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
681 /// leaves and refusing to reuse any of them as inner nodes.
682 SmallPtrSet<Value*, 8> NotRewritable;
683 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
684 NotRewritable.insert(Ops[i].Op);
685
686 // ExpressionChanged - Non-null if the rewritten expression differs from the
687 // original in some non-trivial way, requiring the clearing of optional flags.
688 // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
689 BinaryOperator *ExpressionChanged = nullptr;
690 for (unsigned i = 0; ; ++i) {
691 // The last operation (which comes earliest in the IR) is special as both
692 // operands will come from Ops, rather than just one with the other being
693 // a subexpression.
694 if (i+2 == Ops.size()) {
695 Value *NewLHS = Ops[i].Op;
696 Value *NewRHS = Ops[i+1].Op;
697 Value *OldLHS = Op->getOperand(0);
698 Value *OldRHS = Op->getOperand(1);
699
700 if (NewLHS == OldLHS && NewRHS == OldRHS)
701 // Nothing changed, leave it alone.
702 break;
703
704 if (NewLHS == OldRHS && NewRHS == OldLHS) {
705 // The order of the operands was reversed. Swap them.
706 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
707 Op->swapOperands();
708 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
709 MadeChange = true;
710 ++NumChanged;
711 break;
712 }
713
714 // The new operation differs non-trivially from the original. Overwrite
715 // the old operands with the new ones.
716 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
717 if (NewLHS != OldLHS) {
718 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
719 if (BO && !NotRewritable.count(BO))
720 NodesToRewrite.push_back(BO);
721 Op->setOperand(0, NewLHS);
722 }
723 if (NewRHS != OldRHS) {
724 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
725 if (BO && !NotRewritable.count(BO))
726 NodesToRewrite.push_back(BO);
727 Op->setOperand(1, NewRHS);
728 }
729 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
730
731 ExpressionChanged = Op;
732 MadeChange = true;
733 ++NumChanged;
734
735 break;
736 }
737
738 // Not the last operation. The left-hand side will be a sub-expression
739 // while the right-hand side will be the current element of Ops.
740 Value *NewRHS = Ops[i].Op;
741 if (NewRHS != Op->getOperand(1)) {
742 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
743 if (NewRHS == Op->getOperand(0)) {
744 // The new right-hand side was already present as the left operand. If
745 // we are lucky then swapping the operands will sort out both of them.
746 Op->swapOperands();
747 } else {
748 // Overwrite with the new right-hand side.
749 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
750 if (BO && !NotRewritable.count(BO))
751 NodesToRewrite.push_back(BO);
752 Op->setOperand(1, NewRHS);
753 ExpressionChanged = Op;
754 }
755 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
756 MadeChange = true;
757 ++NumChanged;
758 }
759
760 // Now deal with the left-hand side. If this is already an operation node
761 // from the original expression then just rewrite the rest of the expression
762 // into it.
763 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
764 if (BO && !NotRewritable.count(BO)) {
765 Op = BO;
766 continue;
767 }
768
769 // Otherwise, grab a spare node from the original expression and use that as
770 // the left-hand side. If there are no nodes left then the optimizers made
771 // an expression with more nodes than the original! This usually means that
772 // they did something stupid but it might mean that the problem was just too
773 // hard (finding the mimimal number of multiplications needed to realize a
774 // multiplication expression is NP-complete). Whatever the reason, smart or
775 // stupid, create a new node if there are none left.
776 BinaryOperator *NewOp;
777 if (NodesToRewrite.empty()) {
778 Constant *Undef = UndefValue::get(I->getType());
780 Undef, Undef, "", I);
781 if (isa<FPMathOperator>(NewOp))
782 NewOp->setFastMathFlags(I->getFastMathFlags());
783 } else {
784 NewOp = NodesToRewrite.pop_back_val();
785 }
786
787 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
788 Op->setOperand(0, NewOp);
789 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
790 ExpressionChanged = Op;
791 MadeChange = true;
792 ++NumChanged;
793 Op = NewOp;
794 }
795
796 // If the expression changed non-trivially then clear out all subclass data
797 // starting from the operator specified in ExpressionChanged, and compactify
798 // the operators to just before the expression root to guarantee that the
799 // expression tree is dominated by all of Ops.
800 if (ExpressionChanged)
801 do {
802 // Preserve FastMathFlags.
803 if (isa<FPMathOperator>(I)) {
804 FastMathFlags Flags = I->getFastMathFlags();
805 ExpressionChanged->clearSubclassOptionalData();
806 ExpressionChanged->setFastMathFlags(Flags);
807 } else
808 ExpressionChanged->clearSubclassOptionalData();
809
810 if (ExpressionChanged == I)
811 break;
812
813 // Discard any debug info related to the expressions that has changed (we
814 // can leave debug infor related to the root, since the result of the
815 // expression tree should be the same even after reassociation).
816 replaceDbgUsesWithUndef(ExpressionChanged);
817
818 ExpressionChanged->moveBefore(I);
819 ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
820 } while (true);
821
822 // Throw away any left over nodes from the original expression.
823 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
824 RedoInsts.insert(NodesToRewrite[i]);
825}
826
827/// Insert instructions before the instruction pointed to by BI,
828/// that computes the negative version of the value specified. The negative
829/// version of the value is returned, and BI is left pointing at the instruction
830/// that should be processed next by the reassociation pass.
831/// Also add intermediate instructions to the redo list that are modified while
832/// pushing the negates through adds. These will be revisited to see if
833/// additional opportunities have been exposed.
836 if (auto *C = dyn_cast<Constant>(V)) {
837 const DataLayout &DL = BI->getModule()->getDataLayout();
838 Constant *Res = C->getType()->isFPOrFPVectorTy()
839 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
841 if (Res)
842 return Res;
843 }
844
845 // We are trying to expose opportunity for reassociation. One of the things
846 // that we want to do to achieve this is to push a negation as deep into an
847 // expression chain as possible, to expose the add instructions. In practice,
848 // this means that we turn this:
849 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
850 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
851 // the constants. We assume that instcombine will clean up the mess later if
852 // we introduce tons of unnecessary negation instructions.
853 //
854 if (BinaryOperator *I =
855 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
856 // Push the negates through the add.
857 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
858 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
859 if (I->getOpcode() == Instruction::Add) {
860 I->setHasNoUnsignedWrap(false);
861 I->setHasNoSignedWrap(false);
862 }
863
864 // We must move the add instruction here, because the neg instructions do
865 // not dominate the old add instruction in general. By moving it, we are
866 // assured that the neg instructions we just inserted dominate the
867 // instruction we are about to insert after them.
868 //
869 I->moveBefore(BI);
870 I->setName(I->getName()+".neg");
871
872 // Add the intermediate negates to the redo list as processing them later
873 // could expose more reassociating opportunities.
874 ToRedo.insert(I);
875 return I;
876 }
877
878 // Okay, we need to materialize a negated version of V with an instruction.
879 // Scan the use lists of V to see if we have one already.
880 for (User *U : V->users()) {
881 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
882 continue;
883
884 // We found one! Now we have to make sure that the definition dominates
885 // this use. We do this by moving it to the entry block (if it is a
886 // non-instruction value) or right after the definition. These negates will
887 // be zapped by reassociate later, so we don't need much finesse here.
888 Instruction *TheNeg = dyn_cast<Instruction>(U);
889
890 // We can't safely propagate a vector zero constant with poison/undef lanes.
891 Constant *C;
892 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
893 C->containsUndefOrPoisonElement())
894 continue;
895
896 // Verify that the negate is in this function, V might be a constant expr.
897 if (!TheNeg ||
898 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
899 continue;
900
901 Instruction *InsertPt;
902 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
903 InsertPt = InstInput->getInsertionPointAfterDef();
904 if (!InsertPt)
905 continue;
906 } else {
907 InsertPt = &*TheNeg->getFunction()->getEntryBlock().begin();
908 }
909
910 TheNeg->moveBefore(InsertPt);
911 if (TheNeg->getOpcode() == Instruction::Sub) {
912 TheNeg->setHasNoUnsignedWrap(false);
913 TheNeg->setHasNoSignedWrap(false);
914 } else {
915 TheNeg->andIRFlags(BI);
916 }
917 ToRedo.insert(TheNeg);
918 return TheNeg;
919 }
920
921 // Insert a 'neg' instruction that subtracts the value from zero to get the
922 // negation.
923 Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
924 ToRedo.insert(NewNeg);
925 return NewNeg;
926}
927
928// See if this `or` looks like an load widening reduction, i.e. that it
929// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
930// ensure that the pattern is *really* a load widening reduction,
931// we do not ensure that it can really be replaced with a widened load,
932// only that it mostly looks like one.
936
937 auto Enqueue = [&](Value *V) {
938 auto *I = dyn_cast<Instruction>(V);
939 // Each node of an `or` reduction must be an instruction,
940 if (!I)
941 return false; // Node is certainly not part of an `or` load reduction.
942 // Only process instructions we have never processed before.
943 if (Visited.insert(I).second)
944 Worklist.emplace_back(I);
945 return true; // Will need to look at parent nodes.
946 };
947
948 if (!Enqueue(Or))
949 return false; // Not an `or` reduction pattern.
950
951 while (!Worklist.empty()) {
952 auto *I = Worklist.pop_back_val();
953
954 // Okay, which instruction is this node?
955 switch (I->getOpcode()) {
956 case Instruction::Or:
957 // Got an `or` node. That's fine, just recurse into it's operands.
958 for (Value *Op : I->operands())
959 if (!Enqueue(Op))
960 return false; // Not an `or` reduction pattern.
961 continue;
962
963 case Instruction::Shl:
964 case Instruction::ZExt:
965 // `shl`/`zext` nodes are fine, just recurse into their base operand.
966 if (!Enqueue(I->getOperand(0)))
967 return false; // Not an `or` reduction pattern.
968 continue;
969
970 case Instruction::Load:
971 // Perfect, `load` node means we've reached an edge of the graph.
972 continue;
973
974 default: // Unknown node.
975 return false; // Not an `or` reduction pattern.
976 }
977 }
978
979 return true;
980}
981
982/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
984 // Don't bother to convert this up unless either the LHS is an associable add
985 // or subtract or mul or if this is only used by one of the above.
986 // This is only a compile-time improvement, it is not needed for correctness!
987 auto isInteresting = [](Value *V) {
988 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
989 Instruction::Shl})
990 if (isReassociableOp(V, Op))
991 return true;
992 return false;
993 };
994
995 if (any_of(Or->operands(), isInteresting))
996 return true;
997
998 Value *VB = Or->user_back();
999 if (Or->hasOneUse() && isInteresting(VB))
1000 return true;
1001
1002 return false;
1003}
1004
1005/// If we have (X|Y), and iff X and Y have no common bits set,
1006/// transform this into (X+Y) to allow arithmetics reassociation.
1008 // Convert an or into an add.
1009 BinaryOperator *New =
1010 CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
1011 New->setHasNoSignedWrap();
1012 New->setHasNoUnsignedWrap();
1013 New->takeName(Or);
1014
1015 // Everyone now refers to the add instruction.
1016 Or->replaceAllUsesWith(New);
1017 New->setDebugLoc(Or->getDebugLoc());
1018
1019 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
1020 return New;
1021}
1022
1023/// Return true if we should break up this subtract of X-Y into (X + -Y).
1025 // If this is a negation, we can't split it up!
1026 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1027 return false;
1028
1029 // Don't breakup X - undef.
1030 if (isa<UndefValue>(Sub->getOperand(1)))
1031 return false;
1032
1033 // Don't bother to break this up unless either the LHS is an associable add or
1034 // subtract or if this is only used by one.
1035 Value *V0 = Sub->getOperand(0);
1036 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1037 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1038 return true;
1039 Value *V1 = Sub->getOperand(1);
1040 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1041 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1042 return true;
1043 Value *VB = Sub->user_back();
1044 if (Sub->hasOneUse() &&
1045 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1046 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1047 return true;
1048
1049 return false;
1050}
1051
1052/// If we have (X-Y), and if either X is an add, or if this is only used by an
1053/// add, transform this into (X+(0-Y)) to promote better reassociation.
1056 // Convert a subtract into an add and a neg instruction. This allows sub
1057 // instructions to be commuted with other add instructions.
1058 //
1059 // Calculate the negative value of Operand 1 of the sub instruction,
1060 // and set it as the RHS of the add instruction we just made.
1061 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1062 BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
1063 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1064 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1065 New->takeName(Sub);
1066
1067 // Everyone now refers to the add instruction.
1068 Sub->replaceAllUsesWith(New);
1069 New->setDebugLoc(Sub->getDebugLoc());
1070
1071 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1072 return New;
1073}
1074
1075/// If this is a shift of a reassociable multiply or is used by one, change
1076/// this into a multiply by a constant to assist with further reassociation.
1078 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1079 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1080 MulCst = ConstantExpr::getShl(MulCst, SA);
1081
1083 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
1084 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1085 Mul->takeName(Shl);
1086
1087 // Everyone now refers to the mul instruction.
1088 Shl->replaceAllUsesWith(Mul);
1089 Mul->setDebugLoc(Shl->getDebugLoc());
1090
1091 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1092 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1093 // handling. It can be preserved as long as we're not left shifting by
1094 // bitwidth - 1.
1095 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1096 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1097 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1098 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1099 Mul->setHasNoSignedWrap(true);
1100 Mul->setHasNoUnsignedWrap(NUW);
1101 return Mul;
1102}
1103
1104/// Scan backwards and forwards among values with the same rank as element i
1105/// to see if X exists. If X does not exist, return i. This is useful when
1106/// scanning for 'x' when we see '-x' because they both get the same rank.
1108 unsigned i, Value *X) {
1109 unsigned XRank = Ops[i].Rank;
1110 unsigned e = Ops.size();
1111 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1112 if (Ops[j].Op == X)
1113 return j;
1114 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1115 if (Instruction *I2 = dyn_cast<Instruction>(X))
1116 if (I1->isIdenticalTo(I2))
1117 return j;
1118 }
1119 // Scan backwards.
1120 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1121 if (Ops[j].Op == X)
1122 return j;
1123 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1124 if (Instruction *I2 = dyn_cast<Instruction>(X))
1125 if (I1->isIdenticalTo(I2))
1126 return j;
1127 }
1128 return i;
1129}
1130
1131/// Emit a tree of add instructions, summing Ops together
1132/// and returning the result. Insert the tree before I.
1135 if (Ops.size() == 1) return Ops.back();
1136
1137 Value *V1 = Ops.pop_back_val();
1138 Value *V2 = EmitAddTreeOfValues(I, Ops);
1139 return CreateAdd(V2, V1, "reass.add", I, I);
1140}
1141
1142/// If V is an expression tree that is a multiplication sequence,
1143/// and if this sequence contains a multiply by Factor,
1144/// remove Factor from the tree and return the new tree.
1145Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1146 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1147 if (!BO)
1148 return nullptr;
1149
1151 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
1153 Factors.reserve(Tree.size());
1154 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1155 RepeatedValue E = Tree[i];
1156 Factors.append(E.second.getZExtValue(),
1157 ValueEntry(getRank(E.first), E.first));
1158 }
1159
1160 bool FoundFactor = false;
1161 bool NeedsNegate = false;
1162 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1163 if (Factors[i].Op == Factor) {
1164 FoundFactor = true;
1165 Factors.erase(Factors.begin()+i);
1166 break;
1167 }
1168
1169 // If this is a negative version of this factor, remove it.
1170 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1171 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1172 if (FC1->getValue() == -FC2->getValue()) {
1173 FoundFactor = NeedsNegate = true;
1174 Factors.erase(Factors.begin()+i);
1175 break;
1176 }
1177 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1178 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1179 const APFloat &F1 = FC1->getValueAPF();
1180 APFloat F2(FC2->getValueAPF());
1181 F2.changeSign();
1182 if (F1 == F2) {
1183 FoundFactor = NeedsNegate = true;
1184 Factors.erase(Factors.begin() + i);
1185 break;
1186 }
1187 }
1188 }
1189 }
1190
1191 if (!FoundFactor) {
1192 // Make sure to restore the operands to the expression tree.
1193 RewriteExprTree(BO, Factors);
1194 return nullptr;
1195 }
1196
1197 BasicBlock::iterator InsertPt = ++BO->getIterator();
1198
1199 // If this was just a single multiply, remove the multiply and return the only
1200 // remaining operand.
1201 if (Factors.size() == 1) {
1202 RedoInsts.insert(BO);
1203 V = Factors[0].Op;
1204 } else {
1205 RewriteExprTree(BO, Factors);
1206 V = BO;
1207 }
1208
1209 if (NeedsNegate)
1210 V = CreateNeg(V, "neg", &*InsertPt, BO);
1211
1212 return V;
1213}
1214
1215/// If V is a single-use multiply, recursively add its operands as factors,
1216/// otherwise add V to the list of factors.
1217///
1218/// Ops is the top-level list of add operands we're trying to factor.
1220 SmallVectorImpl<Value*> &Factors) {
1221 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1222 if (!BO) {
1223 Factors.push_back(V);
1224 return;
1225 }
1226
1227 // Otherwise, add the LHS and RHS to the list of factors.
1230}
1231
1232/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1233/// This optimizes based on identities. If it can be reduced to a single Value,
1234/// it is returned, otherwise the Ops list is mutated as necessary.
1235static Value *OptimizeAndOrXor(unsigned Opcode,
1237 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1238 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1239 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1240 // First, check for X and ~X in the operand list.
1241 assert(i < Ops.size());
1242 Value *X;
1243 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1244 unsigned FoundX = FindInOperandList(Ops, i, X);
1245 if (FoundX != i) {
1246 if (Opcode == Instruction::And) // ...&X&~X = 0
1247 return Constant::getNullValue(X->getType());
1248
1249 if (Opcode == Instruction::Or) // ...|X|~X = -1
1250 return Constant::getAllOnesValue(X->getType());
1251 }
1252 }
1253
1254 // Next, check for duplicate pairs of values, which we assume are next to
1255 // each other, due to our sorting criteria.
1256 assert(i < Ops.size());
1257 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1258 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1259 // Drop duplicate values for And and Or.
1260 Ops.erase(Ops.begin()+i);
1261 --i; --e;
1262 ++NumAnnihil;
1263 continue;
1264 }
1265
1266 // Drop pairs of values for Xor.
1267 assert(Opcode == Instruction::Xor);
1268 if (e == 2)
1269 return Constant::getNullValue(Ops[0].Op->getType());
1270
1271 // Y ^ X^X -> Y
1272 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1273 i -= 1; e -= 2;
1274 ++NumAnnihil;
1275 }
1276 }
1277 return nullptr;
1278}
1279
1280/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1281/// instruction with the given two operands, and return the resulting
1282/// instruction. There are two special cases: 1) if the constant operand is 0,
1283/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1284/// be returned.
1285static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
1286 const APInt &ConstOpnd) {
1287 if (ConstOpnd.isZero())
1288 return nullptr;
1289
1290 if (ConstOpnd.isAllOnes())
1291 return Opnd;
1292
1293 Instruction *I = BinaryOperator::CreateAnd(
1294 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1295 InsertBefore);
1296 I->setDebugLoc(InsertBefore->getDebugLoc());
1297 return I;
1298}
1299
1300// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1301// into "R ^ C", where C would be 0, and R is a symbolic value.
1302//
1303// If it was successful, true is returned, and the "R" and "C" is returned
1304// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1305// and both "Res" and "ConstOpnd" remain unchanged.
1306bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1307 APInt &ConstOpnd, Value *&Res) {
1308 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1309 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1310 // = (x & ~c1) ^ (c1 ^ c2)
1311 // It is useful only when c1 == c2.
1312 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1313 return false;
1314
1315 if (!Opnd1->getValue()->hasOneUse())
1316 return false;
1317
1318 const APInt &C1 = Opnd1->getConstPart();
1319 if (C1 != ConstOpnd)
1320 return false;
1321
1322 Value *X = Opnd1->getSymbolicPart();
1323 Res = createAndInstr(I, X, ~C1);
1324 // ConstOpnd was C2, now C1 ^ C2.
1325 ConstOpnd ^= C1;
1326
1327 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1328 RedoInsts.insert(T);
1329 return true;
1330}
1331
1332// Helper function of OptimizeXor(). It tries to simplify
1333// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1334// symbolic value.
1335//
1336// If it was successful, true is returned, and the "R" and "C" is returned
1337// via "Res" and "ConstOpnd", respectively (If the entire expression is
1338// evaluated to a constant, the Res is set to NULL); otherwise, false is
1339// returned, and both "Res" and "ConstOpnd" remain unchanged.
1340bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
1341 XorOpnd *Opnd2, APInt &ConstOpnd,
1342 Value *&Res) {
1343 Value *X = Opnd1->getSymbolicPart();
1344 if (X != Opnd2->getSymbolicPart())
1345 return false;
1346
1347 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1348 int DeadInstNum = 1;
1349 if (Opnd1->getValue()->hasOneUse())
1350 DeadInstNum++;
1351 if (Opnd2->getValue()->hasOneUse())
1352 DeadInstNum++;
1353
1354 // Xor-Rule 2:
1355 // (x | c1) ^ (x & c2)
1356 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1357 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1358 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1359 //
1360 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1361 if (Opnd2->isOrExpr())
1362 std::swap(Opnd1, Opnd2);
1363
1364 const APInt &C1 = Opnd1->getConstPart();
1365 const APInt &C2 = Opnd2->getConstPart();
1366 APInt C3((~C1) ^ C2);
1367
1368 // Do not increase code size!
1369 if (!C3.isZero() && !C3.isAllOnes()) {
1370 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1371 if (NewInstNum > DeadInstNum)
1372 return false;
1373 }
1374
1375 Res = createAndInstr(I, X, C3);
1376 ConstOpnd ^= C1;
1377 } else if (Opnd1->isOrExpr()) {
1378 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1379 //
1380 const APInt &C1 = Opnd1->getConstPart();
1381 const APInt &C2 = Opnd2->getConstPart();
1382 APInt C3 = C1 ^ C2;
1383
1384 // Do not increase code size
1385 if (!C3.isZero() && !C3.isAllOnes()) {
1386 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1387 if (NewInstNum > DeadInstNum)
1388 return false;
1389 }
1390
1391 Res = createAndInstr(I, X, C3);
1392 ConstOpnd ^= C3;
1393 } else {
1394 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1395 //
1396 const APInt &C1 = Opnd1->getConstPart();
1397 const APInt &C2 = Opnd2->getConstPart();
1398 APInt C3 = C1 ^ C2;
1399 Res = createAndInstr(I, X, C3);
1400 }
1401
1402 // Put the original operands in the Redo list; hope they will be deleted
1403 // as dead code.
1404 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1405 RedoInsts.insert(T);
1406 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1407 RedoInsts.insert(T);
1408
1409 return true;
1410}
1411
1412/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1413/// to a single Value, it is returned, otherwise the Ops list is mutated as
1414/// necessary.
1415Value *ReassociatePass::OptimizeXor(Instruction *I,
1417 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1418 return V;
1419
1420 if (Ops.size() == 1)
1421 return nullptr;
1422
1424 SmallVector<XorOpnd*, 8> OpndPtrs;
1425 Type *Ty = Ops[0].Op->getType();
1426 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1427
1428 // Step 1: Convert ValueEntry to XorOpnd
1429 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1430 Value *V = Ops[i].Op;
1431 const APInt *C;
1432 // TODO: Support non-splat vectors.
1433 if (match(V, m_APInt(C))) {
1434 ConstOpnd ^= *C;
1435 } else {
1436 XorOpnd O(V);
1437 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1438 Opnds.push_back(O);
1439 }
1440 }
1441
1442 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1443 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1444 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1445 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1446 // when new elements are added to the vector.
1447 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1448 OpndPtrs.push_back(&Opnds[i]);
1449
1450 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1451 // the same symbolic value cluster together. For instance, the input operand
1452 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1453 // ("x | 123", "x & 789", "y & 456").
1454 //
1455 // The purpose is twofold:
1456 // 1) Cluster together the operands sharing the same symbolic-value.
1457 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1458 // could potentially shorten crital path, and expose more loop-invariants.
1459 // Note that values' rank are basically defined in RPO order (FIXME).
1460 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1461 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1462 // "z" in the order of X-Y-Z is better than any other orders.
1463 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1464 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1465 });
1466
1467 // Step 3: Combine adjacent operands
1468 XorOpnd *PrevOpnd = nullptr;
1469 bool Changed = false;
1470 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1471 XorOpnd *CurrOpnd = OpndPtrs[i];
1472 // The combined value
1473 Value *CV;
1474
1475 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1476 if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
1477 Changed = true;
1478 if (CV)
1479 *CurrOpnd = XorOpnd(CV);
1480 else {
1481 CurrOpnd->Invalidate();
1482 continue;
1483 }
1484 }
1485
1486 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1487 PrevOpnd = CurrOpnd;
1488 continue;
1489 }
1490
1491 // step 3.2: When previous and current operands share the same symbolic
1492 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1493 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1494 // Remove previous operand
1495 PrevOpnd->Invalidate();
1496 if (CV) {
1497 *CurrOpnd = XorOpnd(CV);
1498 PrevOpnd = CurrOpnd;
1499 } else {
1500 CurrOpnd->Invalidate();
1501 PrevOpnd = nullptr;
1502 }
1503 Changed = true;
1504 }
1505 }
1506
1507 // Step 4: Reassemble the Ops
1508 if (Changed) {
1509 Ops.clear();
1510 for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
1511 XorOpnd &O = Opnds[i];
1512 if (O.isInvalid())
1513 continue;
1514 ValueEntry VE(getRank(O.getValue()), O.getValue());
1515 Ops.push_back(VE);
1516 }
1517 if (!ConstOpnd.isZero()) {
1518 Value *C = ConstantInt::get(Ty, ConstOpnd);
1519 ValueEntry VE(getRank(C), C);
1520 Ops.push_back(VE);
1521 }
1522 unsigned Sz = Ops.size();
1523 if (Sz == 1)
1524 return Ops.back().Op;
1525 if (Sz == 0) {
1526 assert(ConstOpnd.isZero());
1527 return ConstantInt::get(Ty, ConstOpnd);
1528 }
1529 }
1530
1531 return nullptr;
1532}
1533
1534/// Optimize a series of operands to an 'add' instruction. This
1535/// optimizes based on identities. If it can be reduced to a single Value, it
1536/// is returned, otherwise the Ops list is mutated as necessary.
1537Value *ReassociatePass::OptimizeAdd(Instruction *I,
1539 // Scan the operand lists looking for X and -X pairs. If we find any, we
1540 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1541 // scan for any
1542 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1543
1544 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1545 Value *TheOp = Ops[i].Op;
1546 // Check to see if we've seen this operand before. If so, we factor all
1547 // instances of the operand together. Due to our sorting criteria, we know
1548 // that these need to be next to each other in the vector.
1549 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1550 // Rescan the list, remove all instances of this operand from the expr.
1551 unsigned NumFound = 0;
1552 do {
1553 Ops.erase(Ops.begin()+i);
1554 ++NumFound;
1555 } while (i != Ops.size() && Ops[i].Op == TheOp);
1556
1557 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1558 << '\n');
1559 ++NumFactor;
1560
1561 // Insert a new multiply.
1562 Type *Ty = TheOp->getType();
1563 Constant *C = Ty->isIntOrIntVectorTy() ?
1564 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1565 Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
1566
1567 // Now that we have inserted a multiply, optimize it. This allows us to
1568 // handle cases that require multiple factoring steps, such as this:
1569 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1570 RedoInsts.insert(Mul);
1571
1572 // If every add operand was a duplicate, return the multiply.
1573 if (Ops.empty())
1574 return Mul;
1575
1576 // Otherwise, we had some input that didn't have the dupe, such as
1577 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1578 // things being added by this operation.
1579 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1580
1581 --i;
1582 e = Ops.size();
1583 continue;
1584 }
1585
1586 // Check for X and -X or X and ~X in the operand list.
1587 Value *X;
1588 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1589 !match(TheOp, m_FNeg(m_Value(X))))
1590 continue;
1591
1592 unsigned FoundX = FindInOperandList(Ops, i, X);
1593 if (FoundX == i)
1594 continue;
1595
1596 // Remove X and -X from the operand list.
1597 if (Ops.size() == 2 &&
1598 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1599 return Constant::getNullValue(X->getType());
1600
1601 // Remove X and ~X from the operand list.
1602 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1603 return Constant::getAllOnesValue(X->getType());
1604
1605 Ops.erase(Ops.begin()+i);
1606 if (i < FoundX)
1607 --FoundX;
1608 else
1609 --i; // Need to back up an extra one.
1610 Ops.erase(Ops.begin()+FoundX);
1611 ++NumAnnihil;
1612 --i; // Revisit element.
1613 e -= 2; // Removed two elements.
1614
1615 // if X and ~X we append -1 to the operand list.
1616 if (match(TheOp, m_Not(m_Value()))) {
1617 Value *V = Constant::getAllOnesValue(X->getType());
1618 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1619 e += 1;
1620 }
1621 }
1622
1623 // Scan the operand list, checking to see if there are any common factors
1624 // between operands. Consider something like A*A+A*B*C+D. We would like to
1625 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1626 // To efficiently find this, we count the number of times a factor occurs
1627 // for any ADD operands that are MULs.
1628 DenseMap<Value*, unsigned> FactorOccurrences;
1629
1630 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1631 // where they are actually the same multiply.
1632 unsigned MaxOcc = 0;
1633 Value *MaxOccVal = nullptr;
1634 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1635 BinaryOperator *BOp =
1636 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1637 if (!BOp)
1638 continue;
1639
1640 // Compute all of the factors of this added value.
1641 SmallVector<Value*, 8> Factors;
1642 FindSingleUseMultiplyFactors(BOp, Factors);
1643 assert(Factors.size() > 1 && "Bad linearize!");
1644
1645 // Add one to FactorOccurrences for each unique factor in this op.
1646 SmallPtrSet<Value*, 8> Duplicates;
1647 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1648 Value *Factor = Factors[i];
1649 if (!Duplicates.insert(Factor).second)
1650 continue;
1651
1652 unsigned Occ = ++FactorOccurrences[Factor];
1653 if (Occ > MaxOcc) {
1654 MaxOcc = Occ;
1655 MaxOccVal = Factor;
1656 }
1657
1658 // If Factor is a negative constant, add the negated value as a factor
1659 // because we can percolate the negate out. Watch for minint, which
1660 // cannot be positivified.
1661 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1662 if (CI->isNegative() && !CI->isMinValue(true)) {
1663 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1664 if (!Duplicates.insert(Factor).second)
1665 continue;
1666 unsigned Occ = ++FactorOccurrences[Factor];
1667 if (Occ > MaxOcc) {
1668 MaxOcc = Occ;
1669 MaxOccVal = Factor;
1670 }
1671 }
1672 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1673 if (CF->isNegative()) {
1674 APFloat F(CF->getValueAPF());
1675 F.changeSign();
1676 Factor = ConstantFP::get(CF->getContext(), F);
1677 if (!Duplicates.insert(Factor).second)
1678 continue;
1679 unsigned Occ = ++FactorOccurrences[Factor];
1680 if (Occ > MaxOcc) {
1681 MaxOcc = Occ;
1682 MaxOccVal = Factor;
1683 }
1684 }
1685 }
1686 }
1687 }
1688
1689 // If any factor occurred more than one time, we can pull it out.
1690 if (MaxOcc > 1) {
1691 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1692 << '\n');
1693 ++NumFactor;
1694
1695 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1696 // this, we could otherwise run into situations where removing a factor
1697 // from an expression will drop a use of maxocc, and this can cause
1698 // RemoveFactorFromExpression on successive values to behave differently.
1699 Instruction *DummyInst =
1700 I->getType()->isIntOrIntVectorTy()
1701 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1702 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1703
1705 for (unsigned i = 0; i != Ops.size(); ++i) {
1706 // Only try to remove factors from expressions we're allowed to.
1707 BinaryOperator *BOp =
1708 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1709 if (!BOp)
1710 continue;
1711
1712 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1713 // The factorized operand may occur several times. Convert them all in
1714 // one fell swoop.
1715 for (unsigned j = Ops.size(); j != i;) {
1716 --j;
1717 if (Ops[j].Op == Ops[i].Op) {
1718 NewMulOps.push_back(V);
1719 Ops.erase(Ops.begin()+j);
1720 }
1721 }
1722 --i;
1723 }
1724 }
1725
1726 // No need for extra uses anymore.
1727 DummyInst->deleteValue();
1728
1729 unsigned NumAddedValues = NewMulOps.size();
1730 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1731
1732 // Now that we have inserted the add tree, optimize it. This allows us to
1733 // handle cases that require multiple factoring steps, such as this:
1734 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1735 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1736 (void)NumAddedValues;
1737 if (Instruction *VI = dyn_cast<Instruction>(V))
1738 RedoInsts.insert(VI);
1739
1740 // Create the multiply.
1741 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
1742
1743 // Rerun associate on the multiply in case the inner expression turned into
1744 // a multiply. We want to make sure that we keep things in canonical form.
1745 RedoInsts.insert(V2);
1746
1747 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1748 // entire result expression is just the multiply "A*(B+C)".
1749 if (Ops.empty())
1750 return V2;
1751
1752 // Otherwise, we had some input that didn't have the factor, such as
1753 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1754 // things being added by this operation.
1755 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1756 }
1757
1758 return nullptr;
1759}
1760
1761/// Build up a vector of value/power pairs factoring a product.
1762///
1763/// Given a series of multiplication operands, build a vector of factors and
1764/// the powers each is raised to when forming the final product. Sort them in
1765/// the order of descending power.
1766///
1767/// (x*x) -> [(x, 2)]
1768/// ((x*x)*x) -> [(x, 3)]
1769/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1770///
1771/// \returns Whether any factors have a power greater than one.
1773 SmallVectorImpl<Factor> &Factors) {
1774 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1775 // Compute the sum of powers of simplifiable factors.
1776 unsigned FactorPowerSum = 0;
1777 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1778 Value *Op = Ops[Idx-1].Op;
1779
1780 // Count the number of occurrences of this value.
1781 unsigned Count = 1;
1782 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1783 ++Count;
1784 // Track for simplification all factors which occur 2 or more times.
1785 if (Count > 1)
1786 FactorPowerSum += Count;
1787 }
1788
1789 // We can only simplify factors if the sum of the powers of our simplifiable
1790 // factors is 4 or higher. When that is the case, we will *always* have
1791 // a simplification. This is an important invariant to prevent cyclicly
1792 // trying to simplify already minimal formations.
1793 if (FactorPowerSum < 4)
1794 return false;
1795
1796 // Now gather the simplifiable factors, removing them from Ops.
1797 FactorPowerSum = 0;
1798 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1799 Value *Op = Ops[Idx-1].Op;
1800
1801 // Count the number of occurrences of this value.
1802 unsigned Count = 1;
1803 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1804 ++Count;
1805 if (Count == 1)
1806 continue;
1807 // Move an even number of occurrences to Factors.
1808 Count &= ~1U;
1809 Idx -= Count;
1810 FactorPowerSum += Count;
1811 Factors.push_back(Factor(Op, Count));
1812 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1813 }
1814
1815 // None of the adjustments above should have reduced the sum of factor powers
1816 // below our mininum of '4'.
1817 assert(FactorPowerSum >= 4);
1818
1819 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1820 return LHS.Power > RHS.Power;
1821 });
1822 return true;
1823}
1824
1825/// Build a tree of multiplies, computing the product of Ops.
1828 if (Ops.size() == 1)
1829 return Ops.back();
1830
1831 Value *LHS = Ops.pop_back_val();
1832 do {
1833 if (LHS->getType()->isIntOrIntVectorTy())
1834 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1835 else
1836 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1837 } while (!Ops.empty());
1838
1839 return LHS;
1840}
1841
1842/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1843///
1844/// Given a vector of values raised to various powers, where no two values are
1845/// equal and the powers are sorted in decreasing order, compute the minimal
1846/// DAG of multiplies to compute the final product, and return that product
1847/// value.
1848Value *
1849ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1850 SmallVectorImpl<Factor> &Factors) {
1851 assert(Factors[0].Power);
1852 SmallVector<Value *, 4> OuterProduct;
1853 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1854 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1855 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1856 LastIdx = Idx;
1857 continue;
1858 }
1859
1860 // We want to multiply across all the factors with the same power so that
1861 // we can raise them to that power as a single entity. Build a mini tree
1862 // for that.
1863 SmallVector<Value *, 4> InnerProduct;
1864 InnerProduct.push_back(Factors[LastIdx].Base);
1865 do {
1866 InnerProduct.push_back(Factors[Idx].Base);
1867 ++Idx;
1868 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1869
1870 // Reset the base value of the first factor to the new expression tree.
1871 // We'll remove all the factors with the same power in a second pass.
1872 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1873 if (Instruction *MI = dyn_cast<Instruction>(M))
1874 RedoInsts.insert(MI);
1875
1876 LastIdx = Idx;
1877 }
1878 // Unique factors with equal powers -- we've folded them into the first one's
1879 // base.
1880 Factors.erase(std::unique(Factors.begin(), Factors.end(),
1881 [](const Factor &LHS, const Factor &RHS) {
1882 return LHS.Power == RHS.Power;
1883 }),
1884 Factors.end());
1885
1886 // Iteratively collect the base of each factor with an add power into the
1887 // outer product, and halve each power in preparation for squaring the
1888 // expression.
1889 for (Factor &F : Factors) {
1890 if (F.Power & 1)
1891 OuterProduct.push_back(F.Base);
1892 F.Power >>= 1;
1893 }
1894 if (Factors[0].Power) {
1895 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1896 OuterProduct.push_back(SquareRoot);
1897 OuterProduct.push_back(SquareRoot);
1898 }
1899 if (OuterProduct.size() == 1)
1900 return OuterProduct.front();
1901
1902 Value *V = buildMultiplyTree(Builder, OuterProduct);
1903 return V;
1904}
1905
1906Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1908 // We can only optimize the multiplies when there is a chain of more than
1909 // three, such that a balanced tree might require fewer total multiplies.
1910 if (Ops.size() < 4)
1911 return nullptr;
1912
1913 // Try to turn linear trees of multiplies without other uses of the
1914 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1915 // re-use.
1916 SmallVector<Factor, 4> Factors;
1917 if (!collectMultiplyFactors(Ops, Factors))
1918 return nullptr; // All distinct factors, so nothing left for us to do.
1919
1921 // The reassociate transformation for FP operations is performed only
1922 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1923 // to the newly generated operations.
1924 if (auto FPI = dyn_cast<FPMathOperator>(I))
1925 Builder.setFastMathFlags(FPI->getFastMathFlags());
1926
1927 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1928 if (Ops.empty())
1929 return V;
1930
1931 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1932 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1933 return nullptr;
1934}
1935
1936Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1938 // Now that we have the linearized expression tree, try to optimize it.
1939 // Start by folding any constants that we found.
1940 const DataLayout &DL = I->getModule()->getDataLayout();
1941 Constant *Cst = nullptr;
1942 unsigned Opcode = I->getOpcode();
1943 while (!Ops.empty()) {
1944 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1945 if (!Cst) {
1946 Ops.pop_back();
1947 Cst = C;
1948 continue;
1949 }
1950 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1951 Ops.pop_back();
1952 Cst = Res;
1953 continue;
1954 }
1955 }
1956 break;
1957 }
1958 // If there was nothing but constants then we are done.
1959 if (Ops.empty())
1960 return Cst;
1961
1962 // Put the combined constant back at the end of the operand list, except if
1963 // there is no point. For example, an add of 0 gets dropped here, while a
1964 // multiplication by zero turns the whole expression into zero.
1965 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1966 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1967 return Cst;
1968 Ops.push_back(ValueEntry(0, Cst));
1969 }
1970
1971 if (Ops.size() == 1) return Ops[0].Op;
1972
1973 // Handle destructive annihilation due to identities between elements in the
1974 // argument list here.
1975 unsigned NumOps = Ops.size();
1976 switch (Opcode) {
1977 default: break;
1978 case Instruction::And:
1979 case Instruction::Or:
1980 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1981 return Result;
1982 break;
1983
1984 case Instruction::Xor:
1985 if (Value *Result = OptimizeXor(I, Ops))
1986 return Result;
1987 break;
1988
1989 case Instruction::Add:
1990 case Instruction::FAdd:
1991 if (Value *Result = OptimizeAdd(I, Ops))
1992 return Result;
1993 break;
1994
1995 case Instruction::Mul:
1996 case Instruction::FMul:
1997 if (Value *Result = OptimizeMul(I, Ops))
1998 return Result;
1999 break;
2000 }
2001
2002 if (Ops.size() != NumOps)
2003 return OptimizeExpression(I, Ops);
2004 return nullptr;
2005}
2006
2007// Remove dead instructions and if any operands are trivially dead add them to
2008// Insts so they will be removed as well.
2009void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2010 OrderedSet &Insts) {
2011 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2012 SmallVector<Value *, 4> Ops(I->operands());
2013 ValueRankMap.erase(I);
2014 Insts.remove(I);
2015 RedoInsts.remove(I);
2017 I->eraseFromParent();
2018 for (auto *Op : Ops)
2019 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2020 if (OpInst->use_empty())
2021 Insts.insert(OpInst);
2022}
2023
2024/// Zap the given instruction, adding interesting operands to the work list.
2025void ReassociatePass::EraseInst(Instruction *I) {
2026 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2027 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2028
2029 SmallVector<Value *, 8> Ops(I->operands());
2030 // Erase the dead instruction.
2031 ValueRankMap.erase(I);
2032 RedoInsts.remove(I);
2034 I->eraseFromParent();
2035 // Optimize its operands.
2036 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2037 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2038 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
2039 // If this is a node in an expression tree, climb to the expression root
2040 // and add that since that's where optimization actually happens.
2041 unsigned Opcode = Op->getOpcode();
2042 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2043 Visited.insert(Op).second)
2044 Op = Op->user_back();
2045
2046 // The instruction we're going to push may be coming from a
2047 // dead block, and Reassociate skips the processing of unreachable
2048 // blocks because it's a waste of time and also because it can
2049 // lead to infinite loop due to LLVM's non-standard definition
2050 // of dominance.
2051 if (ValueRankMap.contains(Op))
2052 RedoInsts.insert(Op);
2053 }
2054
2055 MadeChange = true;
2056}
2057
2058/// Recursively analyze an expression to build a list of instructions that have
2059/// negative floating-point constant operands. The caller can then transform
2060/// the list to create positive constants for better reassociation and CSE.
2062 SmallVectorImpl<Instruction *> &Candidates) {
2063 // Handle only one-use instructions. Combining negations does not justify
2064 // replicating instructions.
2065 Instruction *I;
2066 if (!match(V, m_OneUse(m_Instruction(I))))
2067 return;
2068
2069 // Handle expressions of multiplications and divisions.
2070 // TODO: This could look through floating-point casts.
2071 const APFloat *C;
2072 switch (I->getOpcode()) {
2073 case Instruction::FMul:
2074 // Not expecting non-canonical code here. Bail out and wait.
2075 if (match(I->getOperand(0), m_Constant()))
2076 break;
2077
2078 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2079 Candidates.push_back(I);
2080 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2081 }
2082 getNegatibleInsts(I->getOperand(0), Candidates);
2083 getNegatibleInsts(I->getOperand(1), Candidates);
2084 break;
2085 case Instruction::FDiv:
2086 // Not expecting non-canonical code here. Bail out and wait.
2087 if (match(I->getOperand(0), m_Constant()) &&
2088 match(I->getOperand(1), m_Constant()))
2089 break;
2090
2091 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2092 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2093 Candidates.push_back(I);
2094 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2095 }
2096 getNegatibleInsts(I->getOperand(0), Candidates);
2097 getNegatibleInsts(I->getOperand(1), Candidates);
2098 break;
2099 default:
2100 break;
2101 }
2102}
2103
2104/// Given an fadd/fsub with an operand that is a one-use instruction
2105/// (the fadd/fsub), try to change negative floating-point constants into
2106/// positive constants to increase potential for reassociation and CSE.
2107Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2108 Instruction *Op,
2109 Value *OtherOp) {
2110 assert((I->getOpcode() == Instruction::FAdd ||
2111 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2112
2113 // Collect instructions with negative FP constants from the subtree that ends
2114 // in Op.
2116 getNegatibleInsts(Op, Candidates);
2117 if (Candidates.empty())
2118 return nullptr;
2119
2120 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2121 // resulting subtract will be broken up later. This can get us into an
2122 // infinite loop during reassociation.
2123 bool IsFSub = I->getOpcode() == Instruction::FSub;
2124 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2125 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2126 return nullptr;
2127
2128 for (Instruction *Negatible : Candidates) {
2129 const APFloat *C;
2130 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2131 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2132 "Expecting only 1 constant operand");
2133 assert(C->isNegative() && "Expected negative FP constant");
2134 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2135 MadeChange = true;
2136 }
2137 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2138 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2139 "Expecting only 1 constant operand");
2140 assert(C->isNegative() && "Expected negative FP constant");
2141 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2142 MadeChange = true;
2143 }
2144 }
2145 assert(MadeChange == true && "Negative constant candidate was not changed");
2146
2147 // Negations cancelled out.
2148 if (Candidates.size() % 2 == 0)
2149 return I;
2150
2151 // Negate the final operand in the expression by flipping the opcode of this
2152 // fadd/fsub.
2153 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2155 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2156 : Builder.CreateFSubFMF(OtherOp, Op, I);
2157 I->replaceAllUsesWith(NewInst);
2158 RedoInsts.insert(I);
2159 return dyn_cast<Instruction>(NewInst);
2160}
2161
2162/// Canonicalize expressions that contain a negative floating-point constant
2163/// of the following form:
2164/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2165/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2166/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2167///
2168/// The fadd/fsub opcode may be switched to allow folding a negation into the
2169/// input instruction.
2170Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2171 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2172 Value *X;
2173 Instruction *Op;
2175 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2176 I = R;
2178 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2179 I = R;
2181 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2182 I = R;
2183 return I;
2184}
2185
2186/// Inspect and optimize the given instruction. Note that erasing
2187/// instructions is not allowed.
2188void ReassociatePass::OptimizeInst(Instruction *I) {
2189 // Only consider operations that we understand.
2190 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2191 return;
2192
2193 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2194 // If an operand of this shift is a reassociable multiply, or if the shift
2195 // is used by a reassociable multiply or add, turn into a multiply.
2196 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2197 (I->hasOneUse() &&
2198 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2199 isReassociableOp(I->user_back(), Instruction::Add)))) {
2201 RedoInsts.insert(I);
2202 MadeChange = true;
2203 I = NI;
2204 }
2205
2206 // Commute binary operators, to canonicalize the order of their operands.
2207 // This can potentially expose more CSE opportunities, and makes writing other
2208 // transformations simpler.
2209 if (I->isCommutative())
2210 canonicalizeOperands(I);
2211
2212 // Canonicalize negative constants out of expressions.
2213 if (Instruction *Res = canonicalizeNegFPConstants(I))
2214 I = Res;
2215
2216 // Don't optimize floating-point instructions unless they have the
2217 // appropriate FastMathFlags for reassociation enabled.
2218 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2219 return;
2220
2221 // Do not reassociate boolean (i1) expressions. We want to preserve the
2222 // original order of evaluation for short-circuited comparisons that
2223 // SimplifyCFG has folded to AND/OR expressions. If the expression
2224 // is not further optimized, it is likely to be transformed back to a
2225 // short-circuited form for code gen, and the source order may have been
2226 // optimized for the most likely conditions.
2227 if (I->getType()->isIntegerTy(1))
2228 return;
2229
2230 // If this is a bitwise or instruction of operands
2231 // with no common bits set, convert it to X+Y.
2232 if (I->getOpcode() == Instruction::Or &&
2234 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2235 I->getModule()->getDataLayout(), /*AC=*/nullptr, I,
2236 /*DT=*/nullptr)) {
2238 RedoInsts.insert(I);
2239 MadeChange = true;
2240 I = NI;
2241 }
2242
2243 // If this is a subtract instruction which is not already in negate form,
2244 // see if we can convert it to X+-Y.
2245 if (I->getOpcode() == Instruction::Sub) {
2246 if (ShouldBreakUpSubtract(I)) {
2247 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2248 RedoInsts.insert(I);
2249 MadeChange = true;
2250 I = NI;
2251 } else if (match(I, m_Neg(m_Value()))) {
2252 // Otherwise, this is a negation. See if the operand is a multiply tree
2253 // and if this is not an inner node of a multiply tree.
2254 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2255 (!I->hasOneUse() ||
2256 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2258 // If the negate was simplified, revisit the users to see if we can
2259 // reassociate further.
2260 for (User *U : NI->users()) {
2261 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2262 RedoInsts.insert(Tmp);
2263 }
2264 RedoInsts.insert(I);
2265 MadeChange = true;
2266 I = NI;
2267 }
2268 }
2269 } else if (I->getOpcode() == Instruction::FNeg ||
2270 I->getOpcode() == Instruction::FSub) {
2271 if (ShouldBreakUpSubtract(I)) {
2272 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2273 RedoInsts.insert(I);
2274 MadeChange = true;
2275 I = NI;
2276 } else if (match(I, m_FNeg(m_Value()))) {
2277 // Otherwise, this is a negation. See if the operand is a multiply tree
2278 // and if this is not an inner node of a multiply tree.
2279 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2280 I->getOperand(0);
2281 if (isReassociableOp(Op, Instruction::FMul) &&
2282 (!I->hasOneUse() ||
2283 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2284 // If the negate was simplified, revisit the users to see if we can
2285 // reassociate further.
2287 for (User *U : NI->users()) {
2288 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2289 RedoInsts.insert(Tmp);
2290 }
2291 RedoInsts.insert(I);
2292 MadeChange = true;
2293 I = NI;
2294 }
2295 }
2296 }
2297
2298 // If this instruction is an associative binary operator, process it.
2299 if (!I->isAssociative()) return;
2300 BinaryOperator *BO = cast<BinaryOperator>(I);
2301
2302 // If this is an interior node of a reassociable tree, ignore it until we
2303 // get to the root of the tree, to avoid N^2 analysis.
2304 unsigned Opcode = BO->getOpcode();
2305 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2306 // During the initial run we will get to the root of the tree.
2307 // But if we get here while we are redoing instructions, there is no
2308 // guarantee that the root will be visited. So Redo later
2309 if (BO->user_back() != BO &&
2310 BO->getParent() == BO->user_back()->getParent())
2311 RedoInsts.insert(BO->user_back());
2312 return;
2313 }
2314
2315 // If this is an add tree that is used by a sub instruction, ignore it
2316 // until we process the subtract.
2317 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2318 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2319 return;
2320 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2321 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2322 return;
2323
2324 ReassociateExpression(BO);
2325}
2326
2327void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2328 // First, walk the expression tree, linearizing the tree, collecting the
2329 // operand information.
2331 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
2333 Ops.reserve(Tree.size());
2334 for (const RepeatedValue &E : Tree)
2335 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
2336
2337 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2338
2339 // Now that we have linearized the tree to a list and have gathered all of
2340 // the operands and their ranks, sort the operands by their rank. Use a
2341 // stable_sort so that values with equal ranks will have their relative
2342 // positions maintained (and so the compiler is deterministic). Note that
2343 // this sorts so that the highest ranking values end up at the beginning of
2344 // the vector.
2345 llvm::stable_sort(Ops);
2346
2347 // Now that we have the expression tree in a convenient
2348 // sorted form, optimize it globally if possible.
2349 if (Value *V = OptimizeExpression(I, Ops)) {
2350 if (V == I)
2351 // Self-referential expression in unreachable code.
2352 return;
2353 // This expression tree simplified to something that isn't a tree,
2354 // eliminate it.
2355 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2356 I->replaceAllUsesWith(V);
2357 if (Instruction *VI = dyn_cast<Instruction>(V))
2358 if (I->getDebugLoc())
2359 VI->setDebugLoc(I->getDebugLoc());
2360 RedoInsts.insert(I);
2361 ++NumAnnihil;
2362 return;
2363 }
2364
2365 // We want to sink immediates as deeply as possible except in the case where
2366 // this is a multiply tree used only by an add, and the immediate is a -1.
2367 // In this case we reassociate to put the negation on the outside so that we
2368 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2369 if (I->hasOneUse()) {
2370 if (I->getOpcode() == Instruction::Mul &&
2371 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2372 isa<ConstantInt>(Ops.back().Op) &&
2373 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2374 ValueEntry Tmp = Ops.pop_back_val();
2375 Ops.insert(Ops.begin(), Tmp);
2376 } else if (I->getOpcode() == Instruction::FMul &&
2377 cast<Instruction>(I->user_back())->getOpcode() ==
2378 Instruction::FAdd &&
2379 isa<ConstantFP>(Ops.back().Op) &&
2380 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2381 ValueEntry Tmp = Ops.pop_back_val();
2382 Ops.insert(Ops.begin(), Tmp);
2383 }
2384 }
2385
2386 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2387
2388 if (Ops.size() == 1) {
2389 if (Ops[0].Op == I)
2390 // Self-referential expression in unreachable code.
2391 return;
2392
2393 // This expression tree simplified to something that isn't a tree,
2394 // eliminate it.
2395 I->replaceAllUsesWith(Ops[0].Op);
2396 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2397 OI->setDebugLoc(I->getDebugLoc());
2398 RedoInsts.insert(I);
2399 return;
2400 }
2401
2402 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2403 // Find the pair with the highest count in the pairmap and move it to the
2404 // back of the list so that it can later be CSE'd.
2405 // example:
2406 // a*b*c*d*e
2407 // if c*e is the most "popular" pair, we can express this as
2408 // (((c*e)*d)*b)*a
2409 unsigned Max = 1;
2410 unsigned BestRank = 0;
2411 std::pair<unsigned, unsigned> BestPair;
2412 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2413 for (unsigned i = 0; i < Ops.size() - 1; ++i)
2414 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2415 unsigned Score = 0;
2416 Value *Op0 = Ops[i].Op;
2417 Value *Op1 = Ops[j].Op;
2418 if (std::less<Value *>()(Op1, Op0))
2419 std::swap(Op0, Op1);
2420 auto it = PairMap[Idx].find({Op0, Op1});
2421 if (it != PairMap[Idx].end()) {
2422 // Functions like BreakUpSubtract() can erase the Values we're using
2423 // as keys and create new Values after we built the PairMap. There's a
2424 // small chance that the new nodes can have the same address as
2425 // something already in the table. We shouldn't accumulate the stored
2426 // score in that case as it refers to the wrong Value.
2427 if (it->second.isValid())
2428 Score += it->second.Score;
2429 }
2430
2431 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2432 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2433 BestPair = {i, j};
2434 Max = Score;
2435 BestRank = MaxRank;
2436 }
2437 }
2438 if (Max > 1) {
2439 auto Op0 = Ops[BestPair.first];
2440 auto Op1 = Ops[BestPair.second];
2441 Ops.erase(&Ops[BestPair.second]);
2442 Ops.erase(&Ops[BestPair.first]);
2443 Ops.push_back(Op0);
2444 Ops.push_back(Op1);
2445 }
2446 }
2447 // Now that we ordered and optimized the expressions, splat them back into
2448 // the expression tree, removing any unneeded nodes.
2449 RewriteExprTree(I, Ops);
2450}
2451
2452void
2453ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2454 // Make a "pairmap" of how often each operand pair occurs.
2455 for (BasicBlock *BI : RPOT) {
2456 for (Instruction &I : *BI) {
2457 if (!I.isAssociative())
2458 continue;
2459
2460 // Ignore nodes that aren't at the root of trees.
2461 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2462 continue;
2463
2464 // Collect all operands in a single reassociable expression.
2465 // Since Reassociate has already been run once, we can assume things
2466 // are already canonical according to Reassociation's regime.
2467 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2469 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2470 Value *Op = Worklist.pop_back_val();
2471 Instruction *OpI = dyn_cast<Instruction>(Op);
2472 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2473 Ops.push_back(Op);
2474 continue;
2475 }
2476 // Be paranoid about self-referencing expressions in unreachable code.
2477 if (OpI->getOperand(0) != OpI)
2478 Worklist.push_back(OpI->getOperand(0));
2479 if (OpI->getOperand(1) != OpI)
2480 Worklist.push_back(OpI->getOperand(1));
2481 }
2482 // Skip extremely long expressions.
2483 if (Ops.size() > GlobalReassociateLimit)
2484 continue;
2485
2486 // Add all pairwise combinations of operands to the pair map.
2487 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2489 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2490 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2491 // Canonicalize operand orderings.
2492 Value *Op0 = Ops[i];
2493 Value *Op1 = Ops[j];
2494 if (std::less<Value *>()(Op1, Op0))
2495 std::swap(Op0, Op1);
2496 if (!Visited.insert({Op0, Op1}).second)
2497 continue;
2498 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2499 if (!res.second) {
2500 // If either key value has been erased then we've got the same
2501 // address by coincidence. That can't happen here because nothing is
2502 // erasing values but it can happen by the time we're querying the
2503 // map.
2504 assert(res.first->second.isValid() && "WeakVH invalidated");
2505 ++res.first->second.Score;
2506 }
2507 }
2508 }
2509 }
2510 }
2511}
2512
2514 // Get the functions basic blocks in Reverse Post Order. This order is used by
2515 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2516 // blocks (it has been seen that the analysis in this pass could hang when
2517 // analysing dead basic blocks).
2519
2520 // Calculate the rank map for F.
2521 BuildRankMap(F, RPOT);
2522
2523 // Build the pair map before running reassociate.
2524 // Technically this would be more accurate if we did it after one round
2525 // of reassociation, but in practice it doesn't seem to help much on
2526 // real-world code, so don't waste the compile time running reassociate
2527 // twice.
2528 // If a user wants, they could expicitly run reassociate twice in their
2529 // pass pipeline for further potential gains.
2530 // It might also be possible to update the pair map during runtime, but the
2531 // overhead of that may be large if there's many reassociable chains.
2532 BuildPairMap(RPOT);
2533
2534 MadeChange = false;
2535
2536 // Traverse the same blocks that were analysed by BuildRankMap.
2537 for (BasicBlock *BI : RPOT) {
2538 assert(RankMap.count(&*BI) && "BB should be ranked.");
2539 // Optimize every instruction in the basic block.
2540 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2541 if (isInstructionTriviallyDead(&*II)) {
2542 EraseInst(&*II++);
2543 } else {
2544 OptimizeInst(&*II);
2545 assert(II->getParent() == &*BI && "Moved to a different block!");
2546 ++II;
2547 }
2548
2549 // Make a copy of all the instructions to be redone so we can remove dead
2550 // instructions.
2551 OrderedSet ToRedo(RedoInsts);
2552 // Iterate over all instructions to be reevaluated and remove trivially dead
2553 // instructions. If any operand of the trivially dead instruction becomes
2554 // dead mark it for deletion as well. Continue this process until all
2555 // trivially dead instructions have been removed.
2556 while (!ToRedo.empty()) {
2557 Instruction *I = ToRedo.pop_back_val();
2559 RecursivelyEraseDeadInsts(I, ToRedo);
2560 MadeChange = true;
2561 }
2562 }
2563
2564 // Now that we have removed dead instructions, we can reoptimize the
2565 // remaining instructions.
2566 while (!RedoInsts.empty()) {
2567 Instruction *I = RedoInsts.front();
2568 RedoInsts.erase(RedoInsts.begin());
2570 EraseInst(I);
2571 else
2572 OptimizeInst(I);
2573 }
2574 }
2575
2576 // We are done with the rank map and pair map.
2577 RankMap.clear();
2578 ValueRankMap.clear();
2579 for (auto &Entry : PairMap)
2580 Entry.clear();
2581
2582 if (MadeChange) {
2585 return PA;
2586 }
2587
2588 return PreservedAnalyses::all();
2589}
2590
2591namespace {
2592
2593 class ReassociateLegacyPass : public FunctionPass {
2594 ReassociatePass Impl;
2595
2596 public:
2597 static char ID; // Pass identification, replacement for typeid
2598
2599 ReassociateLegacyPass() : FunctionPass(ID) {
2601 }
2602
2603 bool runOnFunction(Function &F) override {
2604 if (skipFunction(F))
2605 return false;
2606
2607 FunctionAnalysisManager DummyFAM;
2608 auto PA = Impl.run(F, DummyFAM);
2609 return !PA.areAllPreserved();
2610 }
2611
2612 void getAnalysisUsage(AnalysisUsage &AU) const override {
2613 AU.setPreservesCFG();
2617 }
2618 };
2619
2620} // end anonymous namespace
2621
2622char ReassociateLegacyPass::ID = 0;
2623
2624INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2625 "Reassociate expressions", false, false)
2626
2627// Public interface to the Reassociate pass
2629 return new ReassociateLegacyPass();
2630}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
nary reassociate
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:75
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static unsigned CarmichaelShift(unsigned Bitwidth)
Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael function.
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static Instruction * CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static BinaryOperator * isReassociableOp(Value *V, unsigned Opcode)
Return true if V is an instruction of the specified opcode and if it only has one use.
static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode)
Add the extra weight 'RHS' to the existing weight 'LHS', reducing the combined weight using any speci...
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
std::pair< Value *, APInt > RepeatedValue
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
static Value * createAndInstr(Instruction *InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Flags
Definition: TextStubV5.cpp:93
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:354
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:366
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:409
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:459
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2746
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2655
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2701
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2609
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:256
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:935
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
const BasicBlock & getEntryBlock() const
Definition: Function.h:740
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
bool isNilpotent() const
Return true if the instruction is nilpotent:
Definition: Instruction.h:610
const char * getOpcodeName() const
Definition: Instruction.h:170
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
bool isIdempotent() const
Return true if the instruction is idempotent:
Definition: Instruction.h:596
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1758
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: PassManager.h:327
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
Reassociate commutative expressions.
Definition: Reassociate.h:71
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:809
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:237
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1739
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:109
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
self_iterator getIterator()
Definition: ilist_node.h:82
Utility class representing a non-constant Xor-operand.
Definition: Reassociate.cpp:95
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:997
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:985
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Undef
Value of the register doesn't matter.
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2026
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1335
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1789
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:614
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2001
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:59