LLVM 23.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (const ValueEntry &Op : Ops) {
87 dbgs() << "[ ";
88 Op.Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Op.Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
182 ReversePostOrderTraversal<Function*> &RPOT) {
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS)) {
245 cast<BinaryOperator>(I)->swapOperands();
246 MadeChange = true;
247 }
248}
249
250static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
251 BasicBlock::iterator InsertBefore,
252 Value *FlagsOp) {
253 if (S1->getType()->isIntOrIntVectorTy())
254 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
255 else {
256 BinaryOperator *Res =
257 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
258 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
259 return Res;
260 }
261}
262
263static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
264 BasicBlock::iterator InsertBefore,
265 Value *FlagsOp) {
266 if (S1->getType()->isIntOrIntVectorTy())
267 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
268 else {
269 BinaryOperator *Res =
270 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
271 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
272 return Res;
273 }
274}
275
276static Instruction *CreateNeg(Value *S1, const Twine &Name,
277 BasicBlock::iterator InsertBefore,
278 Value *FlagsOp) {
279 if (S1->getType()->isIntOrIntVectorTy())
280 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
281
282 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
283 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
284
285 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
286}
287
288/// Replace 0-X with X*-1.
291 "Expected a Negate!");
292 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
293 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
294 Type *Ty = Neg->getType();
295 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
296 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
297
298 BinaryOperator *Res =
299 CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
300 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
301 Res->takeName(Neg);
302 Neg->replaceAllUsesWith(Res);
303 Res->setDebugLoc(Neg->getDebugLoc());
304 return Res;
305}
306
307using RepeatedValue = std::pair<Value *, uint64_t>;
308
309/// Given an associative binary expression, return the leaf
310/// nodes in Ops along with their weights (how many times the leaf occurs). The
311/// original expression is the same as
312/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
313/// op
314/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
315/// op
316/// ...
317/// op
318/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
319///
320/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
321///
322/// This routine may modify the function, in which case it returns 'true'. The
323/// changes it makes may well be destructive, changing the value computed by 'I'
324/// to something completely different. Thus if the routine returns 'true' then
325/// you MUST either replace I with a new expression computed from the Ops array,
326/// or use RewriteExprTree to put the values back in.
327///
328/// A leaf node is either not a binary operation of the same kind as the root
329/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
330/// opcode), or is the same kind of binary operator but has a use which either
331/// does not belong to the expression, or does belong to the expression but is
332/// a leaf node. Every leaf node has at least one use that is a non-leaf node
333/// of the expression, while for non-leaf nodes (except for the root 'I') every
334/// use is a non-leaf node of the expression.
335///
336/// For example:
337/// expression graph node names
338///
339/// + | I
340/// / \ |
341/// + + | A, B
342/// / \ / \ |
343/// * + * | C, D, E
344/// / \ / \ / \ |
345/// + * | F, G
346///
347/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
348/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
349///
350/// The expression is maximal: if some instruction is a binary operator of the
351/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
352/// then the instruction also belongs to the expression, is not a leaf node of
353/// it, and its operands also belong to the expression (but may be leaf nodes).
354///
355/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
356/// order to ensure that every non-root node in the expression has *exactly one*
357/// use by a non-leaf node of the expression. This destruction means that the
358/// caller MUST either replace 'I' with a new expression or use something like
359/// RewriteExprTree to put the values back in if the routine indicates that it
360/// made a change by returning 'true'.
361///
362/// In the above example either the right operand of A or the left operand of B
363/// will be replaced by undef. If it is B's operand then this gives:
364///
365/// + | I
366/// / \ |
367/// + + | A, B - operand of B replaced with undef
368/// / \ \ |
369/// * + * | C, D, E
370/// / \ / \ / \ |
371/// + * | F, G
372///
373/// Note that such undef operands can only be reached by passing through 'I'.
374/// For example, if you visit operands recursively starting from a leaf node
375/// then you will never see such an undef operand unless you get back to 'I',
376/// which requires passing through a phi node.
377///
378/// Note that this routine may also mutate binary operators of the wrong type
379/// that have all uses inside the expression (i.e. only used by non-leaf nodes
380/// of the expression) if it can turn them into binary operators of the right
381/// type and thus make the expression bigger.
385 OverflowTracking &Flags) {
387 "Expected a UnaryOperator or BinaryOperator!");
388 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
389 unsigned Opcode = I->getOpcode();
390 assert(I->isAssociative() && I->isCommutative() &&
391 "Expected an associative and commutative operation!");
392
393 // Visit all operands of the expression, keeping track of their weight (the
394 // number of paths from the expression root to the operand, or if you like
395 // the number of times that operand occurs in the linearized expression).
396 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
397 // while A has weight two.
398
399 // Worklist of non-leaf nodes (their operands are in the expression too) along
400 // with their weights, representing a certain number of paths to the operator.
401 // If an operator occurs in the worklist multiple times then we found multiple
402 // ways to get to it.
403 SmallVector<std::pair<Instruction *, uint64_t>, 8> Worklist; // (Op, Weight)
404 Worklist.push_back(std::make_pair(I, 1));
405 bool Changed = false;
406
407 // Leaves of the expression are values that either aren't the right kind of
408 // operation (eg: a constant, or a multiply in an add tree), or are, but have
409 // some uses that are not inside the expression. For example, in I = X + X,
410 // X = A + B, the value X has two uses (by I) that are in the expression. If
411 // X has any other uses, for example in a return instruction, then we consider
412 // X to be a leaf, and won't analyze it further. When we first visit a value,
413 // if it has more than one use then at first we conservatively consider it to
414 // be a leaf. Later, as the expression is explored, we may discover some more
415 // uses of the value from inside the expression. If all uses turn out to be
416 // from within the expression (and the value is a binary operator of the right
417 // kind) then the value is no longer considered to be a leaf, and its operands
418 // are explored.
419
420 // Leaves - Keeps track of the set of putative leaves as well as the number of
421 // paths to each leaf seen so far.
422 using LeafMap = DenseMap<Value *, uint64_t>;
423 LeafMap Leaves; // Leaf -> Total weight so far.
424 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
425 const DataLayout &DL = I->getDataLayout();
426
427#ifndef NDEBUG
428 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
429#endif
430 while (!Worklist.empty()) {
431 // We examine the operands of this binary operator.
432 auto [I, Weight] = Worklist.pop_back_val();
433
434 Flags.mergeFlags(*I);
435
436 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
437 Value *Op = I->getOperand(OpIdx);
438 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
439 assert((!Op->hasUseList() || !Op->use_empty()) &&
440 "No uses, so how did we get to it?!");
441
442 // If this is a binary operation of the right kind with only one use then
443 // add its operands to the expression.
444 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
445 assert(Visited.insert(Op).second && "Not first visit!");
446 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
447 Worklist.push_back(std::make_pair(BO, Weight));
448 continue;
449 }
450
451 // Appears to be a leaf. Is the operand already in the set of leaves?
452 LeafMap::iterator It = Leaves.find(Op);
453 if (It == Leaves.end()) {
454 // Not in the leaf map. Must be the first time we saw this operand.
455 assert(Visited.insert(Op).second && "Not first visit!");
456 if (!Op->hasOneUse()) {
457 // This value has uses not accounted for by the expression, so it is
458 // not safe to modify. Mark it as being a leaf.
460 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
461 LeafOrder.push_back(Op);
462 Leaves[Op] = Weight;
463 continue;
464 }
465 // No uses outside the expression, try morphing it.
466 } else {
467 // Already in the leaf map.
468 assert(It != Leaves.end() && Visited.count(Op) &&
469 "In leaf map but not visited!");
470
471 // Update the number of paths to the leaf.
472 It->second += Weight;
473 assert(It->second >= Weight && "Weight overflows");
474
475 // If we still have uses that are not accounted for by the expression
476 // then it is not safe to modify the value.
477 if (!Op->hasOneUse())
478 continue;
479
480 // No uses outside the expression, try morphing it.
481 Weight = It->second;
482 Leaves.erase(It); // Since the value may be morphed below.
483 }
484
485 // At this point we have a value which, first of all, is not a binary
486 // expression of the right kind, and secondly, is only used inside the
487 // expression. This means that it can safely be modified. See if we
488 // can usefully morph it into an expression of the right kind.
490 cast<Instruction>(Op)->getOpcode() != Opcode
491 || (isa<FPMathOperator>(Op) &&
493 "Should have been handled above!");
494 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
495
496 // If this is a multiply expression, turn any internal negations into
497 // multiplies by -1 so they can be reassociated. Add any users of the
498 // newly created multiplication by -1 to the redo list, so any
499 // reassociation opportunities that are exposed will be reassociated
500 // further.
501 Instruction *Neg;
502 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
503 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
504 match(Op, m_Instruction(Neg))) {
506 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
508 LLVM_DEBUG(dbgs() << *Mul << '\n');
509 Worklist.push_back(std::make_pair(Mul, Weight));
510 for (User *U : Mul->users()) {
512 ToRedo.insert(UserBO);
513 }
514 ToRedo.insert(Neg);
515 Changed = true;
516 continue;
517 }
518
519 // Failed to morph into an expression of the right type. This really is
520 // a leaf.
521 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
522 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
523 LeafOrder.push_back(Op);
524 Leaves[Op] = Weight;
525 }
526 }
527
528 // The leaves, repeated according to their weights, represent the linearized
529 // form of the expression.
530 for (Value *V : LeafOrder) {
531 LeafMap::iterator It = Leaves.find(V);
532 if (It == Leaves.end())
533 // Node initially thought to be a leaf wasn't.
534 continue;
535 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
536 uint64_t Weight = It->second;
537 // Ensure the leaf is only output once.
538 It->second = 0;
539 Ops.push_back(std::make_pair(V, Weight));
540 if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
541 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
542 else if (Opcode == Instruction::Mul) {
543 // To preserve NUW we need all inputs non-zero.
544 // To preserve NSW we need all inputs strictly positive.
545 if (Flags.AllKnownNonZero &&
546 (Flags.HasNUW || (Flags.HasNSW && Flags.AllKnownNonNegative))) {
547 Flags.AllKnownNonZero &= isKnownNonZero(V, SimplifyQuery(DL));
548 if (Flags.HasNSW && Flags.AllKnownNonNegative)
549 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
550 }
551 }
552 }
553
554 // For nilpotent operations or addition there may be no operands, for example
555 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
556 // in both cases the weight reduces to 0 causing the value to be skipped.
557 if (Ops.empty()) {
558 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
559 assert(Identity && "Associative operation without identity!");
560 Ops.emplace_back(Identity, 1);
561 }
562
563 return Changed;
564}
565
566/// Now that the operands for this expression tree are
567/// linearized and optimized, emit them in-order.
568void ReassociatePass::RewriteExprTree(BinaryOperator *I,
569 SmallVectorImpl<ValueEntry> &Ops,
570 OverflowTracking Flags) {
571 assert(Ops.size() > 1 && "Single values should be used directly!");
572
573 // Since our optimizations should never increase the number of operations, the
574 // new expression can usually be written reusing the existing binary operators
575 // from the original expression tree, without creating any new instructions,
576 // though the rewritten expression may have a completely different topology.
577 // We take care to not change anything if the new expression will be the same
578 // as the original. If more than trivial changes (like commuting operands)
579 // were made then we are obliged to clear out any optional subclass data like
580 // nsw flags.
581
582 /// NodesToRewrite - Nodes from the original expression available for writing
583 /// the new expression into.
584 SmallVector<BinaryOperator*, 8> NodesToRewrite;
585 unsigned Opcode = I->getOpcode();
586 BinaryOperator *Op = I;
587
588 /// NotRewritable - The operands being written will be the leaves of the new
589 /// expression and must not be used as inner nodes (via NodesToRewrite) by
590 /// mistake. Inner nodes are always reassociable, and usually leaves are not
591 /// (if they were they would have been incorporated into the expression and so
592 /// would not be leaves), so most of the time there is no danger of this. But
593 /// in rare cases a leaf may become reassociable if an optimization kills uses
594 /// of it, or it may momentarily become reassociable during rewriting (below)
595 /// due it being removed as an operand of one of its uses. Ensure that misuse
596 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
597 /// leaves and refusing to reuse any of them as inner nodes.
598 SmallPtrSet<Value*, 8> NotRewritable;
599 for (const ValueEntry &Op : Ops)
600 NotRewritable.insert(Op.Op);
601
602 // ExpressionChangedStart - Non-null if the rewritten expression differs from
603 // the original in some non-trivial way, requiring the clearing of optional
604 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
605 // ExpressionChangedEnd inclusive.
606 BinaryOperator *ExpressionChangedStart = nullptr,
607 *ExpressionChangedEnd = nullptr;
608 for (unsigned i = 0; ; ++i) {
609 // The last operation (which comes earliest in the IR) is special as both
610 // operands will come from Ops, rather than just one with the other being
611 // a subexpression.
612 if (i+2 == Ops.size()) {
613 Value *NewLHS = Ops[i].Op;
614 Value *NewRHS = Ops[i+1].Op;
615 Value *OldLHS = Op->getOperand(0);
616 Value *OldRHS = Op->getOperand(1);
617
618 if (NewLHS == OldLHS && NewRHS == OldRHS)
619 // Nothing changed, leave it alone.
620 break;
621
622 if (NewLHS == OldRHS && NewRHS == OldLHS) {
623 // The order of the operands was reversed. Swap them.
624 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
625 Op->swapOperands();
626 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
627 MadeChange = true;
628 ++NumChanged;
629 break;
630 }
631
632 // The new operation differs non-trivially from the original. Overwrite
633 // the old operands with the new ones.
634 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
635 if (NewLHS != OldLHS) {
636 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
637 if (BO && !NotRewritable.count(BO))
638 NodesToRewrite.push_back(BO);
640 Op->setOperand(0, NewLHS);
641 }
642 if (NewRHS != OldRHS) {
643 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
644 if (BO && !NotRewritable.count(BO))
645 NodesToRewrite.push_back(BO);
647 Op->setOperand(1, NewRHS);
648 }
649 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
650
651 ExpressionChangedStart = Op;
652 if (!ExpressionChangedEnd)
653 ExpressionChangedEnd = Op;
654 MadeChange = true;
655 ++NumChanged;
656
657 break;
658 }
659
660 // Not the last operation. The left-hand side will be a sub-expression
661 // while the right-hand side will be the current element of Ops.
662 Value *NewRHS = Ops[i].Op;
663 if (NewRHS != Op->getOperand(1)) {
664 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
665 if (NewRHS == Op->getOperand(0)) {
666 // The new right-hand side was already present as the left operand. If
667 // we are lucky then swapping the operands will sort out both of them.
668 Op->swapOperands();
669 } else {
670 // Overwrite with the new right-hand side.
671 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
672 if (BO && !NotRewritable.count(BO))
673 NodesToRewrite.push_back(BO);
675 Op->setOperand(1, NewRHS);
676 ExpressionChangedStart = Op;
677 if (!ExpressionChangedEnd)
678 ExpressionChangedEnd = Op;
679 }
680 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
681 MadeChange = true;
682 ++NumChanged;
683 }
684
685 // Now deal with the left-hand side. If this is already an operation node
686 // from the original expression then just rewrite the rest of the expression
687 // into it.
688 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
689 if (BO && !NotRewritable.count(BO)) {
690 Op = BO;
691 continue;
692 }
693
694 // Otherwise, grab a spare node from the original expression and use that as
695 // the left-hand side. If there are no nodes left then the optimizers made
696 // an expression with more nodes than the original! This usually means that
697 // they did something stupid but it might mean that the problem was just too
698 // hard (finding the mimimal number of multiplications needed to realize a
699 // multiplication expression is NP-complete). Whatever the reason, smart or
700 // stupid, create a new node if there are none left.
701 BinaryOperator *NewOp;
702 if (NodesToRewrite.empty()) {
703 Constant *Poison = PoisonValue::get(I->getType());
705 Poison, "", I->getIterator());
706 if (isa<FPMathOperator>(NewOp))
707 NewOp->setFastMathFlags(I->getFastMathFlags());
708 } else {
709 NewOp = NodesToRewrite.pop_back_val();
710 }
711
712 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
714 Op->setOperand(0, NewOp);
715 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
716 ExpressionChangedStart = Op;
717 if (!ExpressionChangedEnd)
718 ExpressionChangedEnd = Op;
719 MadeChange = true;
720 ++NumChanged;
721 Op = NewOp;
722 }
723
724 // If the expression changed non-trivially then clear out all subclass data
725 // starting from the operator specified in ExpressionChanged, and compactify
726 // the operators to just before the expression root to guarantee that the
727 // expression tree is dominated by all of Ops.
728 if (ExpressionChangedStart) {
729 bool ClearFlags = true;
730 do {
731 // Preserve flags.
732 if (ClearFlags) {
733 if (isa<FPMathOperator>(I)) {
734 ExpressionChangedStart->copyFastMathFlags(I->getFastMathFlags());
735 } else {
736 Flags.applyFlags(*ExpressionChangedStart);
737 }
738 }
739
740 if (ExpressionChangedStart == ExpressionChangedEnd)
741 ClearFlags = false;
742 if (ExpressionChangedStart == I)
743 break;
744
745 ExpressionChangedStart->moveBefore(I->getIterator());
746 ExpressionChangedStart =
747 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
748 } while (true);
749 }
750
751 // Throw away any left over nodes from the original expression.
752 RedoInsts.insert_range(NodesToRewrite);
753}
754
755/// Insert instructions before the instruction pointed to by BI,
756/// that computes the negative version of the value specified. The negative
757/// version of the value is returned, and BI is left pointing at the instruction
758/// that should be processed next by the reassociation pass.
759/// Also add intermediate instructions to the redo list that are modified while
760/// pushing the negates through adds. These will be revisited to see if
761/// additional opportunities have been exposed.
764 if (auto *C = dyn_cast<Constant>(V)) {
765 const DataLayout &DL = BI->getDataLayout();
766 Constant *Res = C->getType()->isFPOrFPVectorTy()
767 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
769 if (Res)
770 return Res;
771 }
772
773 // We are trying to expose opportunity for reassociation. One of the things
774 // that we want to do to achieve this is to push a negation as deep into an
775 // expression chain as possible, to expose the add instructions. In practice,
776 // this means that we turn this:
777 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
778 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
779 // the constants. We assume that instcombine will clean up the mess later if
780 // we introduce tons of unnecessary negation instructions.
781 //
782 if (BinaryOperator *I =
783 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
784 // Push the negates through the add.
785 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
786 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
787 if (I->getOpcode() == Instruction::Add) {
788 I->setHasNoUnsignedWrap(false);
789 I->setHasNoSignedWrap(false);
790 }
791
792 // We must move the add instruction here, because the neg instructions do
793 // not dominate the old add instruction in general. By moving it, we are
794 // assured that the neg instructions we just inserted dominate the
795 // instruction we are about to insert after them.
796 //
797 I->moveBefore(BI->getIterator());
798 I->setName(I->getName()+".neg");
799
800 // Add the intermediate negates to the redo list as processing them later
801 // could expose more reassociating opportunities.
802 ToRedo.insert(I);
803 return I;
804 }
805
806 // Okay, we need to materialize a negated version of V with an instruction.
807 // Scan the use lists of V to see if we have one already.
808 for (User *U : V->users()) {
809 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
810 continue;
811
812 // We found one! Now we have to make sure that the definition dominates
813 // this use. We do this by moving it to the entry block (if it is a
814 // non-instruction value) or right after the definition. These negates will
815 // be zapped by reassociate later, so we don't need much finesse here.
817
818 // We can't safely propagate a vector zero constant with poison/undef lanes.
819 Constant *C;
820 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
821 C->containsUndefOrPoisonElement())
822 continue;
823
824 // Verify that the negate is in this function, V might be a constant expr.
825 if (!TheNeg ||
826 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
827 continue;
828
829 BasicBlock::iterator InsertPt;
830 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
831 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
832 if (!InsertPtOpt)
833 continue;
834 InsertPt = *InsertPtOpt;
835 } else {
836 InsertPt = TheNeg->getFunction()
837 ->getEntryBlock()
839 ->getIterator();
840 }
841
842 // Check that if TheNeg is moved out of its parent block, we drop its
843 // debug location to avoid extra coverage.
844 // See test dropping_debugloc_the_neg.ll for a detailed example.
845 if (TheNeg->getParent() != InsertPt->getParent())
846 TheNeg->dropLocation();
847 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
848
849 if (TheNeg->getOpcode() == Instruction::Sub) {
850 TheNeg->setHasNoUnsignedWrap(false);
851 TheNeg->setHasNoSignedWrap(false);
852 } else {
853 TheNeg->andIRFlags(BI);
854 }
855 ToRedo.insert(TheNeg);
856 return TheNeg;
857 }
858
859 // Insert a 'neg' instruction that subtracts the value from zero to get the
860 // negation.
861 Instruction *NewNeg =
862 CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
863 // NewNeg is generated to potentially replace BI, so use its DebugLoc.
864 NewNeg->setDebugLoc(BI->getDebugLoc());
865 ToRedo.insert(NewNeg);
866 return NewNeg;
867}
868
869// See if this `or` looks like an load widening reduction, i.e. that it
870// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
871// ensure that the pattern is *really* a load widening reduction,
872// we do not ensure that it can really be replaced with a widened load,
873// only that it mostly looks like one.
877
878 auto Enqueue = [&](Value *V) {
879 auto *I = dyn_cast<Instruction>(V);
880 // Each node of an `or` reduction must be an instruction,
881 if (!I)
882 return false; // Node is certainly not part of an `or` load reduction.
883 // Only process instructions we have never processed before.
884 if (Visited.insert(I).second)
885 Worklist.emplace_back(I);
886 return true; // Will need to look at parent nodes.
887 };
888
889 if (!Enqueue(Or))
890 return false; // Not an `or` reduction pattern.
891
892 while (!Worklist.empty()) {
893 auto *I = Worklist.pop_back_val();
894
895 // Okay, which instruction is this node?
896 switch (I->getOpcode()) {
897 case Instruction::Or:
898 // Got an `or` node. That's fine, just recurse into it's operands.
899 for (Value *Op : I->operands())
900 if (!Enqueue(Op))
901 return false; // Not an `or` reduction pattern.
902 continue;
903
904 case Instruction::Shl:
905 case Instruction::ZExt:
906 // `shl`/`zext` nodes are fine, just recurse into their base operand.
907 if (!Enqueue(I->getOperand(0)))
908 return false; // Not an `or` reduction pattern.
909 continue;
910
911 case Instruction::Load:
912 // Perfect, `load` node means we've reached an edge of the graph.
913 continue;
914
915 default: // Unknown node.
916 return false; // Not an `or` reduction pattern.
917 }
918 }
919
920 return true;
921}
922
923/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
925 // Don't bother to convert this up unless either the LHS is an associable add
926 // or subtract or mul or if this is only used by one of the above.
927 // This is only a compile-time improvement, it is not needed for correctness!
928 auto isInteresting = [](Value *V) {
929 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
930 Instruction::Shl})
931 if (isReassociableOp(V, Op))
932 return true;
933 return false;
934 };
935
936 if (any_of(Or->operands(), isInteresting))
937 return true;
938
939 Value *VB = Or->user_back();
940 if (Or->hasOneUse() && isInteresting(VB))
941 return true;
942
943 return false;
944}
945
946/// If we have (X|Y), and iff X and Y have no common bits set,
947/// transform this into (X+Y) to allow arithmetics reassociation.
949 // Convert an or into an add.
950 BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
951 Or->getIterator(), Or);
952 New->setHasNoSignedWrap();
953 New->setHasNoUnsignedWrap();
954 New->takeName(Or);
955
956 // Everyone now refers to the add instruction.
957 Or->replaceAllUsesWith(New);
958 New->setDebugLoc(Or->getDebugLoc());
959
960 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
961 return New;
962}
963
964/// Return true if Mul is of the form (X+Y)*C or (X-Y)*C where C is a
965/// constant, and there exists a sibling instruction of the form X*C' or Y*C'
966/// in the same expression — indicating that distribution followed by
967/// factoring will reduce the instruction count.
969 Value *A, *B;
970 if (!match(Mul, m_OneUse(m_Mul(
972 m_Sub(m_Value(A), m_Value(B)))),
973 m_ImmConstant()))))
974 return false;
975
976 auto *MulUser = cast<Instruction>(Mul->user_back());
977 // The parent MUST be an Add or Sub to ensure the tree is flattened
978 if (MulUser->getOpcode() != Instruction::Add &&
979 MulUser->getOpcode() != Instruction::Sub)
980 return false;
981
982 for (Value *Sibling : MulUser->operands()) {
983 if (Sibling == Mul || !Sibling->hasOneUse())
984 continue;
985
986 // Sibling must be NonConst * C'.
987 Value *SibNC;
988 if (match(Sibling, m_Mul(m_Value(SibNC), m_ImmConstant())) &&
989 (SibNC == A || SibNC == B) && !isa<Constant>(SibNC))
990 return true;
991 }
992 return false;
993}
994
995/// Distribute Mul of the form (X+Y)*C into X*C + Y*C.
996/// For the sub case (X-Y)*C, the second term uses -C to avoid
997/// introducing a negation instruction.
1000 Instruction *AddSub = cast<Instruction>(Mul->getOperand(0));
1001 Constant *C = cast<Constant>(Mul->getOperand(1));
1002 Constant *C2 =
1003 AddSub->getOpcode() == Instruction::Sub ? ConstantExpr::getNeg(C) : C;
1004
1005 BinaryOperator *M1 = BinaryOperator::CreateMul(AddSub->getOperand(0), C,
1006 "Mul1", Mul->getIterator());
1007 BinaryOperator *M2 = BinaryOperator::CreateMul(AddSub->getOperand(1), C2,
1008 "Mul2", Mul->getIterator());
1009 BinaryOperator *Result =
1010 BinaryOperator::CreateAdd(M1, M2, "DistAdd", Mul->getIterator());
1011
1012 Mul->replaceAllUsesWith(Result);
1013 Result->setDebugLoc(Mul->getDebugLoc());
1014
1015 ToRedo.insert(M1);
1016 ToRedo.insert(M2);
1017 ToRedo.insert(Result);
1018
1019 return Result;
1020}
1021
1022/// Return true if we should break up this subtract of X-Y into (X + -Y).
1024 // If this is a negation, we can't split it up!
1025 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
1026 return false;
1027
1028 // Don't breakup X - undef.
1029 if (isa<UndefValue>(Sub->getOperand(1)))
1030 return false;
1031
1032 // Don't bother to break this up unless either the LHS is an associable add or
1033 // subtract or if this is only used by one.
1034 Value *V0 = Sub->getOperand(0);
1035 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
1036 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
1037 return true;
1038 Value *V1 = Sub->getOperand(1);
1039 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
1040 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
1041 return true;
1042 Value *VB = Sub->user_back();
1043 if (Sub->hasOneUse() &&
1044 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
1045 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1046 return true;
1047
1048 return false;
1049}
1050
1051/// If we have (X-Y), and if either X is an add, or if this is only used by an
1052/// add, transform this into (X+(0-Y)) to promote better reassociation.
1055 // Convert a subtract into an add and a neg instruction. This allows sub
1056 // instructions to be commuted with other add instructions.
1057 //
1058 // Calculate the negative value of Operand 1 of the sub instruction,
1059 // and set it as the RHS of the add instruction we just made.
1060 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1061 BinaryOperator *New =
1062 CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
1063 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1064 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1065 New->takeName(Sub);
1066
1067 // Everyone now refers to the add instruction.
1068 Sub->replaceAllUsesWith(New);
1069 New->setDebugLoc(Sub->getDebugLoc());
1070
1071 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1072 return New;
1073}
1074
1075/// If this is a shift of a reassociable multiply or is used by one, change
1076/// this into a multiply by a constant to assist with further reassociation.
1078 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1079 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1080 MulCst = ConstantFoldBinaryInstruction(Instruction::Shl, MulCst, SA);
1081 assert(MulCst && "Constant folding of immediate constants failed");
1082
1083 BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
1084 "", Shl->getIterator());
1085 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1086 Mul->takeName(Shl);
1087
1088 // Everyone now refers to the mul instruction.
1089 Shl->replaceAllUsesWith(Mul);
1090 Mul->setDebugLoc(Shl->getDebugLoc());
1091
1092 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1093 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1094 // handling. It can be preserved as long as we're not left shifting by
1095 // bitwidth - 1.
1096 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1097 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1098 unsigned BitWidth = Shl->getType()->getScalarSizeInBits();
1099 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1100 Mul->setHasNoSignedWrap(true);
1101 Mul->setHasNoUnsignedWrap(NUW);
1102 return Mul;
1103}
1104
1105/// Scan backwards and forwards among values with the same rank as element i
1106/// to see if X exists. If X does not exist, return i. This is useful when
1107/// scanning for 'x' when we see '-x' because they both get the same rank.
1109 unsigned i, Value *X) {
1110 unsigned XRank = Ops[i].Rank;
1111 unsigned e = Ops.size();
1112 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1113 if (Ops[j].Op == X)
1114 return j;
1117 if (I1->isIdenticalTo(I2))
1118 return j;
1119 }
1120 // Scan backwards.
1121 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1122 if (Ops[j].Op == X)
1123 return j;
1126 if (I1->isIdenticalTo(I2))
1127 return j;
1128 }
1129 return i;
1130}
1131
1132/// Emit a tree of add instructions, summing Ops together
1133/// and returning the result. Insert the tree before I.
1136 if (Ops.size() == 1) return Ops.back();
1137
1138 Value *V1 = Ops.pop_back_val();
1140 auto *NewAdd = CreateAdd(V2, V1, "reass.add", I->getIterator(), I);
1141 NewAdd->setDebugLoc(I->getDebugLoc());
1142 return NewAdd;
1143}
1144
1145/// If V is an expression tree that is a multiplication sequence,
1146/// and if this sequence contains a multiply by Factor,
1147/// remove Factor from the tree and return the new tree.
1148/// If new instructions are inserted to generate this tree, DL should be used
1149/// as the DebugLoc for these instructions.
1150Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor,
1151 DebugLoc DL) {
1152 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1153 if (!BO)
1154 return nullptr;
1155
1157 OverflowTracking Flags;
1158 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags);
1160 Factors.reserve(Tree.size());
1161 for (const RepeatedValue &E : Tree)
1162 Factors.append(E.second, ValueEntry(getRank(E.first), E.first));
1163
1164 bool FoundFactor = false;
1165 bool NeedsNegate = false;
1166 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1167 if (Factors[i].Op == Factor) {
1168 FoundFactor = true;
1169 Factors.erase(Factors.begin()+i);
1170 break;
1171 }
1172
1173 // If this is a negative version of this factor, remove it.
1174 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1175 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1176 if (FC1->getValue() == -FC2->getValue()) {
1177 FoundFactor = NeedsNegate = true;
1178 Factors.erase(Factors.begin()+i);
1179 break;
1180 }
1181 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1182 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1183 const APFloat &F1 = FC1->getValueAPF();
1184 APFloat F2(FC2->getValueAPF());
1185 F2.changeSign();
1186 if (F1 == F2) {
1187 FoundFactor = NeedsNegate = true;
1188 Factors.erase(Factors.begin() + i);
1189 break;
1190 }
1191 }
1192 }
1193 }
1194
1195 if (!FoundFactor) {
1196 // Make sure to restore the operands to the expression tree.
1197 RewriteExprTree(BO, Factors, Flags);
1198 return nullptr;
1199 }
1200
1201 BasicBlock::iterator InsertPt = ++BO->getIterator();
1202
1203 // If this was just a single multiply, remove the multiply and return the only
1204 // remaining operand.
1205 if (Factors.size() == 1) {
1206 RedoInsts.insert(BO);
1207 V = Factors[0].Op;
1208 } else {
1209 RewriteExprTree(BO, Factors, Flags);
1210 V = BO;
1211 }
1212
1213 if (NeedsNegate) {
1214 V = CreateNeg(V, "neg", InsertPt, BO);
1215 cast<Instruction>(V)->setDebugLoc(DL);
1216 }
1217
1218 return V;
1219}
1220
1221/// If V is a single-use multiply, recursively add its operands as factors,
1222/// otherwise add V to the list of factors.
1223///
1224/// Ops is the top-level list of add operands we're trying to factor.
1226 SmallVectorImpl<Value*> &Factors) {
1227 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1228 if (!BO) {
1229 Factors.push_back(V);
1230 return;
1231 }
1232
1233 // Otherwise, add the LHS and RHS to the list of factors.
1236}
1237
1238/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1239/// This optimizes based on identities. If it can be reduced to a single Value,
1240/// it is returned, otherwise the Ops list is mutated as necessary.
1241static Value *OptimizeAndOrXor(unsigned Opcode,
1243 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1244 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1245 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1246 // First, check for X and ~X in the operand list.
1247 assert(i < Ops.size());
1248 Value *X;
1249 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1250 unsigned FoundX = FindInOperandList(Ops, i, X);
1251 if (FoundX != i) {
1252 if (Opcode == Instruction::And) // ...&X&~X = 0
1253 return Constant::getNullValue(X->getType());
1254
1255 if (Opcode == Instruction::Or) // ...|X|~X = -1
1256 return Constant::getAllOnesValue(X->getType());
1257 }
1258 }
1259
1260 // Next, check for duplicate pairs of values, which we assume are next to
1261 // each other, due to our sorting criteria.
1262 assert(i < Ops.size());
1263 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1264 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1265 // Drop duplicate values for And and Or.
1266 Ops.erase(Ops.begin()+i);
1267 --i; --e;
1268 ++NumAnnihil;
1269 continue;
1270 }
1271
1272 // Drop pairs of values for Xor.
1273 assert(Opcode == Instruction::Xor);
1274 if (e == 2)
1275 return Constant::getNullValue(Ops[0].Op->getType());
1276
1277 // Y ^ X^X -> Y
1278 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1279 i -= 1; e -= 2;
1280 ++NumAnnihil;
1281 }
1282 }
1283 return nullptr;
1284}
1285
1286/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1287/// instruction with the given two operands, and return the resulting
1288/// instruction. There are two special cases: 1) if the constant operand is 0,
1289/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1290/// be returned.
1292 const APInt &ConstOpnd) {
1293 if (ConstOpnd.isZero())
1294 return nullptr;
1295
1296 if (ConstOpnd.isAllOnes())
1297 return Opnd;
1298
1299 Instruction *I = BinaryOperator::CreateAnd(
1300 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1301 InsertBefore);
1302 I->setDebugLoc(InsertBefore->getDebugLoc());
1303 return I;
1304}
1305
1306// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1307// into "R ^ C", where C would be 0, and R is a symbolic value.
1308//
1309// If it was successful, true is returned, and the "R" and "C" is returned
1310// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1311// and both "Res" and "ConstOpnd" remain unchanged.
1312bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1313 APInt &ConstOpnd, Value *&Res) {
1314 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1315 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1316 // = (x & ~c1) ^ (c1 ^ c2)
1317 // It is useful only when c1 == c2.
1318 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1319 return false;
1320
1321 if (!Opnd1->getValue()->hasOneUse())
1322 return false;
1323
1324 const APInt &C1 = Opnd1->getConstPart();
1325 if (C1 != ConstOpnd)
1326 return false;
1327
1328 Value *X = Opnd1->getSymbolicPart();
1329 Res = createAndInstr(It, X, ~C1);
1330 // ConstOpnd was C2, now C1 ^ C2.
1331 ConstOpnd ^= C1;
1332
1333 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1334 RedoInsts.insert(T);
1335 return true;
1336}
1337
1338// Helper function of OptimizeXor(). It tries to simplify
1339// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1340// symbolic value.
1341//
1342// If it was successful, true is returned, and the "R" and "C" is returned
1343// via "Res" and "ConstOpnd", respectively (If the entire expression is
1344// evaluated to a constant, the Res is set to NULL); otherwise, false is
1345// returned, and both "Res" and "ConstOpnd" remain unchanged.
1346bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1347 XorOpnd *Opnd2, APInt &ConstOpnd,
1348 Value *&Res) {
1349 Value *X = Opnd1->getSymbolicPart();
1350 if (X != Opnd2->getSymbolicPart())
1351 return false;
1352
1353 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1354 int DeadInstNum = 1;
1355 if (Opnd1->getValue()->hasOneUse())
1356 DeadInstNum++;
1357 if (Opnd2->getValue()->hasOneUse())
1358 DeadInstNum++;
1359
1360 // Xor-Rule 2:
1361 // (x | c1) ^ (x & c2)
1362 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1363 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1364 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1365 //
1366 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1367 if (Opnd2->isOrExpr())
1368 std::swap(Opnd1, Opnd2);
1369
1370 const APInt &C1 = Opnd1->getConstPart();
1371 const APInt &C2 = Opnd2->getConstPart();
1372 APInt C3((~C1) ^ C2);
1373
1374 // Do not increase code size!
1375 if (!C3.isZero() && !C3.isAllOnes()) {
1376 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1377 if (NewInstNum > DeadInstNum)
1378 return false;
1379 }
1380
1381 Res = createAndInstr(It, X, C3);
1382 ConstOpnd ^= C1;
1383 } else if (Opnd1->isOrExpr()) {
1384 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1385 //
1386 const APInt &C1 = Opnd1->getConstPart();
1387 const APInt &C2 = Opnd2->getConstPart();
1388 APInt C3 = C1 ^ C2;
1389
1390 // Do not increase code size
1391 if (!C3.isZero() && !C3.isAllOnes()) {
1392 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1393 if (NewInstNum > DeadInstNum)
1394 return false;
1395 }
1396
1397 Res = createAndInstr(It, X, C3);
1398 ConstOpnd ^= C3;
1399 } else {
1400 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1401 //
1402 const APInt &C1 = Opnd1->getConstPart();
1403 const APInt &C2 = Opnd2->getConstPart();
1404 APInt C3 = C1 ^ C2;
1405 Res = createAndInstr(It, X, C3);
1406 }
1407
1408 // Put the original operands in the Redo list; hope they will be deleted
1409 // as dead code.
1410 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1411 RedoInsts.insert(T);
1412 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1413 RedoInsts.insert(T);
1414
1415 return true;
1416}
1417
1418/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1419/// to a single Value, it is returned, otherwise the Ops list is mutated as
1420/// necessary.
1421Value *ReassociatePass::OptimizeXor(Instruction *I,
1422 SmallVectorImpl<ValueEntry> &Ops) {
1423 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1424 return V;
1425
1426 if (Ops.size() == 1)
1427 return nullptr;
1428
1430 SmallVector<XorOpnd*, 8> OpndPtrs;
1431 Type *Ty = Ops[0].Op->getType();
1432 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1433
1434 // Step 1: Convert ValueEntry to XorOpnd
1435 for (const ValueEntry &Op : Ops) {
1436 Value *V = Op.Op;
1437 const APInt *C;
1438 // TODO: Support non-splat vectors.
1439 if (match(V, m_APInt(C))) {
1440 ConstOpnd ^= *C;
1441 } else {
1442 XorOpnd O(V);
1443 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1444 Opnds.push_back(O);
1445 }
1446 }
1447
1448 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1449 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1450 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1451 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1452 // when new elements are added to the vector.
1453 for (XorOpnd &Op : Opnds)
1454 OpndPtrs.push_back(&Op);
1455
1456 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1457 // the same symbolic value cluster together. For instance, the input operand
1458 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1459 // ("x | 123", "x & 789", "y & 456").
1460 //
1461 // The purpose is twofold:
1462 // 1) Cluster together the operands sharing the same symbolic-value.
1463 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1464 // could potentially shorten crital path, and expose more loop-invariants.
1465 // Note that values' rank are basically defined in RPO order (FIXME).
1466 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1467 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1468 // "z" in the order of X-Y-Z is better than any other orders.
1469 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1470 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1471 });
1472
1473 // Step 3: Combine adjacent operands
1474 XorOpnd *PrevOpnd = nullptr;
1475 bool Changed = false;
1476 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1477 XorOpnd *CurrOpnd = OpndPtrs[i];
1478 // The combined value
1479 Value *CV;
1480
1481 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1482 if (!ConstOpnd.isZero() &&
1483 CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
1484 Changed = true;
1485 if (CV)
1486 *CurrOpnd = XorOpnd(CV);
1487 else {
1488 CurrOpnd->Invalidate();
1489 continue;
1490 }
1491 }
1492
1493 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1494 PrevOpnd = CurrOpnd;
1495 continue;
1496 }
1497
1498 // step 3.2: When previous and current operands share the same symbolic
1499 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1500 if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1501 // Remove previous operand
1502 PrevOpnd->Invalidate();
1503 if (CV) {
1504 *CurrOpnd = XorOpnd(CV);
1505 PrevOpnd = CurrOpnd;
1506 } else {
1507 CurrOpnd->Invalidate();
1508 PrevOpnd = nullptr;
1509 }
1510 Changed = true;
1511 }
1512 }
1513
1514 // Step 4: Reassemble the Ops
1515 if (Changed) {
1516 Ops.clear();
1517 for (const XorOpnd &O : Opnds) {
1518 if (O.isInvalid())
1519 continue;
1520 ValueEntry VE(getRank(O.getValue()), O.getValue());
1521 Ops.push_back(VE);
1522 }
1523 if (!ConstOpnd.isZero()) {
1524 Value *C = ConstantInt::get(Ty, ConstOpnd);
1525 ValueEntry VE(getRank(C), C);
1526 Ops.push_back(VE);
1527 }
1528 unsigned Sz = Ops.size();
1529 if (Sz == 1)
1530 return Ops.back().Op;
1531 if (Sz == 0) {
1532 assert(ConstOpnd.isZero());
1533 return ConstantInt::get(Ty, ConstOpnd);
1534 }
1535 }
1536
1537 return nullptr;
1538}
1539
1540/// Optimize a series of operands to an 'add' instruction. This
1541/// optimizes based on identities. If it can be reduced to a single Value, it
1542/// is returned, otherwise the Ops list is mutated as necessary.
1543Value *ReassociatePass::OptimizeAdd(Instruction *I,
1544 SmallVectorImpl<ValueEntry> &Ops) {
1545 // Scan the operand lists looking for X and -X pairs. If we find any, we
1546 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1547 // scan for any
1548 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1549
1550 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1551 Value *TheOp = Ops[i].Op;
1552 // Check to see if we've seen this operand before. If so, we factor all
1553 // instances of the operand together. Due to our sorting criteria, we know
1554 // that these need to be next to each other in the vector.
1555 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1556 // Rescan the list, remove all instances of this operand from the expr.
1557 unsigned NumFound = 0;
1558 do {
1559 Ops.erase(Ops.begin()+i);
1560 ++NumFound;
1561 } while (i != Ops.size() && Ops[i].Op == TheOp);
1562
1563 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1564 << '\n');
1565 ++NumFactor;
1566
1567 // Insert a new multiply.
1568 Type *Ty = TheOp->getType();
1569 // Truncate if NumFound overflows the type.
1571 ? ConstantInt::get(Ty, NumFound, /*IsSigned=*/false,
1572 /*ImplicitTrunc=*/true)
1573 : ConstantFP::get(Ty, NumFound);
1574 Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
1575 Mul->setDebugLoc(I->getDebugLoc());
1576
1577 // Now that we have inserted a multiply, optimize it. This allows us to
1578 // handle cases that require multiple factoring steps, such as this:
1579 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1580 RedoInsts.insert(Mul);
1581
1582 // If every add operand was a duplicate, return the multiply.
1583 if (Ops.empty())
1584 return Mul;
1585
1586 // Otherwise, we had some input that didn't have the dupe, such as
1587 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1588 // things being added by this operation.
1589 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1590
1591 --i;
1592 e = Ops.size();
1593 continue;
1594 }
1595
1596 // Check for X and -X or X and ~X in the operand list.
1597 Value *X;
1598 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1599 !match(TheOp, m_FNeg(m_Value(X))))
1600 continue;
1601
1602 unsigned FoundX = FindInOperandList(Ops, i, X);
1603 if (FoundX == i)
1604 continue;
1605
1606 // Remove X and -X from the operand list.
1607 if (Ops.size() == 2 &&
1608 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1609 return Constant::getNullValue(X->getType());
1610
1611 // Remove X and ~X from the operand list.
1612 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1613 return Constant::getAllOnesValue(X->getType());
1614
1615 Ops.erase(Ops.begin()+i);
1616 if (i < FoundX)
1617 --FoundX;
1618 else
1619 --i; // Need to back up an extra one.
1620 Ops.erase(Ops.begin()+FoundX);
1621 ++NumAnnihil;
1622 --i; // Revisit element.
1623 e -= 2; // Removed two elements.
1624
1625 // if X and ~X we append -1 to the operand list.
1626 if (match(TheOp, m_Not(m_Value()))) {
1627 Value *V = Constant::getAllOnesValue(X->getType());
1628 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1629 e += 1;
1630 }
1631 }
1632
1633 // Scan the operand list, checking to see if there are any common factors
1634 // between operands. Consider something like A*A+A*B*C+D. We would like to
1635 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1636 // To efficiently find this, we count the number of times a factor occurs
1637 // for any ADD operands that are MULs.
1638 DenseMap<Value*, unsigned> FactorOccurrences;
1639
1640 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1641 // where they are actually the same multiply.
1642 unsigned MaxOcc = 0;
1643 Value *MaxOccVal = nullptr;
1644
1645 // Prefer a non-constant factor over a constant when occurrence counts
1646 // tie. Factoring out a variable (e.g., X from X*C1 + X*C2) exposes
1647 // downstream constant folding; factoring out a constant does not.
1648 auto IsBetterFactor = [](Value *Factor, Value *MaxOccVal, unsigned Occ,
1649 unsigned MaxOcc) {
1650 return Occ > MaxOcc ||
1651 (Occ == MaxOcc &&
1653 isa<Constant>(MaxOccVal) && !isa<UndefValue>(MaxOccVal));
1654 };
1655 for (const ValueEntry &Op : Ops) {
1656 BinaryOperator *BOp =
1657 isReassociableOp(Op.Op, Instruction::Mul, Instruction::FMul);
1658 if (!BOp)
1659 continue;
1660
1661 // Compute all of the factors of this added value.
1662 SmallVector<Value*, 8> Factors;
1663 FindSingleUseMultiplyFactors(BOp, Factors);
1664 assert(Factors.size() > 1 && "Bad linearize!");
1665
1666 // Add one to FactorOccurrences for each unique factor in this op.
1667 SmallPtrSet<Value*, 8> Duplicates;
1668 for (Value *Factor : Factors) {
1669 if (!Duplicates.insert(Factor).second)
1670 continue;
1671
1672 unsigned Occ = ++FactorOccurrences[Factor];
1673 if (IsBetterFactor(Factor, MaxOccVal, Occ, MaxOcc)) {
1674 MaxOcc = Occ;
1675 MaxOccVal = Factor;
1676 }
1677
1678 // If Factor is a negative constant, add the negated value as a factor
1679 // because we can percolate the negate out. Watch for minint, which
1680 // cannot be positivified.
1681 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1682 if (CI->isNegative() && !CI->isMinValue(true)) {
1683 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1684 if (!Duplicates.insert(Factor).second)
1685 continue;
1686 unsigned Occ = ++FactorOccurrences[Factor];
1687 if (IsBetterFactor(Factor, MaxOccVal, Occ, MaxOcc)) {
1688 MaxOcc = Occ;
1689 MaxOccVal = Factor;
1690 }
1691 }
1692 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1693 if (CF->isNegative()) {
1694 APFloat F(CF->getValueAPF());
1695 F.changeSign();
1696 Factor = ConstantFP::get(CF->getType(), F);
1697 if (!Duplicates.insert(Factor).second)
1698 continue;
1699 unsigned Occ = ++FactorOccurrences[Factor];
1700 if (IsBetterFactor(Factor, MaxOccVal, Occ, MaxOcc)) {
1701 MaxOcc = Occ;
1702 MaxOccVal = Factor;
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 // If any factor occurred more than one time, we can pull it out.
1710 if (MaxOcc > 1) {
1711 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1712 << '\n');
1713 ++NumFactor;
1714
1715 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1716 // this, we could otherwise run into situations where removing a factor
1717 // from an expression will drop a use of maxocc, and this can cause
1718 // RemoveFactorFromExpression on successive values to behave differently.
1719 Instruction *DummyInst =
1720 I->getType()->isIntOrIntVectorTy()
1721 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1722 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1723
1725 for (unsigned i = 0; i != Ops.size(); ++i) {
1726 // Only try to remove factors from expressions we're allowed to.
1727 BinaryOperator *BOp =
1728 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1729 if (!BOp)
1730 continue;
1731
1732 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal,
1733 I->getDebugLoc())) {
1734 // The factorized operand may occur several times. Convert them all in
1735 // one fell swoop.
1736 for (unsigned j = Ops.size(); j != i;) {
1737 --j;
1738 if (Ops[j].Op == Ops[i].Op) {
1739 NewMulOps.push_back(V);
1740 Ops.erase(Ops.begin()+j);
1741 }
1742 }
1743 --i;
1744 }
1745 }
1746
1747 // No need for extra uses anymore.
1748 DummyInst->deleteValue();
1749
1750 unsigned NumAddedValues = NewMulOps.size();
1751 Value *V = EmitAddTreeOfValues(I, NewMulOps);
1752
1753 // Now that we have inserted the add tree, optimize it. This allows us to
1754 // handle cases that require multiple factoring steps, such as this:
1755 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1756 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1757 (void)NumAddedValues;
1758 if (Instruction *VI = dyn_cast<Instruction>(V))
1759 RedoInsts.insert(VI);
1760
1761 // Create the multiply.
1762 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
1763 V2->setDebugLoc(I->getDebugLoc());
1764
1765 // Rerun associate on the multiply in case the inner expression turned into
1766 // a multiply. We want to make sure that we keep things in canonical form.
1767 RedoInsts.insert(V2);
1768
1769 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1770 // entire result expression is just the multiply "A*(B+C)".
1771 if (Ops.empty())
1772 return V2;
1773
1774 // Otherwise, we had some input that didn't have the factor, such as
1775 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1776 // things being added by this operation.
1777 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1778 }
1779
1780 return nullptr;
1781}
1782
1783/// Build up a vector of value/power pairs factoring a product.
1784///
1785/// Given a series of multiplication operands, build a vector of factors and
1786/// the powers each is raised to when forming the final product. Sort them in
1787/// the order of descending power.
1788///
1789/// (x*x) -> [(x, 2)]
1790/// ((x*x)*x) -> [(x, 3)]
1791/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1792///
1793/// \returns Whether any factors have a power greater than one.
1795 SmallVectorImpl<Factor> &Factors) {
1796 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1797 // Compute the sum of powers of simplifiable factors.
1798 unsigned FactorPowerSum = 0;
1799 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1800 Value *Op = Ops[Idx-1].Op;
1801
1802 // Count the number of occurrences of this value.
1803 unsigned Count = 1;
1804 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1805 ++Count;
1806 // Track for simplification all factors which occur 2 or more times.
1807 if (Count > 1)
1808 FactorPowerSum += Count;
1809 }
1810
1811 // We can only simplify factors if the sum of the powers of our simplifiable
1812 // factors is 4 or higher. When that is the case, we will *always* have
1813 // a simplification. This is an important invariant to prevent cyclicly
1814 // trying to simplify already minimal formations.
1815 if (FactorPowerSum < 4)
1816 return false;
1817
1818 // Now gather the simplifiable factors, removing them from Ops.
1819 FactorPowerSum = 0;
1820 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1821 Value *Op = Ops[Idx-1].Op;
1822
1823 // Count the number of occurrences of this value.
1824 unsigned Count = 1;
1825 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1826 ++Count;
1827 if (Count == 1)
1828 continue;
1829 // Move an even number of occurrences to Factors.
1830 Count &= ~1U;
1831 Idx -= Count;
1832 FactorPowerSum += Count;
1833 Factors.push_back(Factor(Op, Count));
1834 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1835 }
1836
1837 // None of the adjustments above should have reduced the sum of factor powers
1838 // below our mininum of '4'.
1839 assert(FactorPowerSum >= 4);
1840
1841 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1842 return LHS.Power > RHS.Power;
1843 });
1844 return true;
1845}
1846
1847/// Build a tree of multiplies, computing the product of Ops.
1850 if (Ops.size() == 1)
1851 return Ops.back();
1852
1853 Value *LHS = Ops.pop_back_val();
1854 do {
1855 if (LHS->getType()->isIntOrIntVectorTy())
1856 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1857 else
1858 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1859 } while (!Ops.empty());
1860
1861 return LHS;
1862}
1863
1864/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1865///
1866/// Given a vector of values raised to various powers, where no two values are
1867/// equal and the powers are sorted in decreasing order, compute the minimal
1868/// DAG of multiplies to compute the final product, and return that product
1869/// value.
1870Value *
1871ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1872 SmallVectorImpl<Factor> &Factors) {
1873 assert(Factors[0].Power);
1874 SmallVector<Value *, 4> OuterProduct;
1875 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1876 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1877 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1878 LastIdx = Idx;
1879 continue;
1880 }
1881
1882 // We want to multiply across all the factors with the same power so that
1883 // we can raise them to that power as a single entity. Build a mini tree
1884 // for that.
1885 SmallVector<Value *, 4> InnerProduct;
1886 InnerProduct.push_back(Factors[LastIdx].Base);
1887 do {
1888 InnerProduct.push_back(Factors[Idx].Base);
1889 ++Idx;
1890 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1891
1892 // Reset the base value of the first factor to the new expression tree.
1893 // We'll remove all the factors with the same power in a second pass.
1894 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1895 if (Instruction *MI = dyn_cast<Instruction>(M))
1896 RedoInsts.insert(MI);
1897
1898 LastIdx = Idx;
1899 }
1900 // Unique factors with equal powers -- we've folded them into the first one's
1901 // base.
1902 Factors.erase(llvm::unique(Factors,
1903 [](const Factor &LHS, const Factor &RHS) {
1904 return LHS.Power == RHS.Power;
1905 }),
1906 Factors.end());
1907
1908 // Iteratively collect the base of each factor with an add power into the
1909 // outer product, and halve each power in preparation for squaring the
1910 // expression.
1911 for (Factor &F : Factors) {
1912 if (F.Power & 1)
1913 OuterProduct.push_back(F.Base);
1914 F.Power >>= 1;
1915 }
1916 if (Factors[0].Power) {
1917 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1918 OuterProduct.push_back(SquareRoot);
1919 OuterProduct.push_back(SquareRoot);
1920 }
1921 if (OuterProduct.size() == 1)
1922 return OuterProduct.front();
1923
1924 Value *V = buildMultiplyTree(Builder, OuterProduct);
1925 return V;
1926}
1927
1928Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1929 SmallVectorImpl<ValueEntry> &Ops) {
1930 // We can only optimize the multiplies when there is a chain of more than
1931 // three, such that a balanced tree might require fewer total multiplies.
1932 if (Ops.size() < 4)
1933 return nullptr;
1934
1935 // Try to turn linear trees of multiplies without other uses of the
1936 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1937 // re-use.
1938 SmallVector<Factor, 4> Factors;
1939 if (!collectMultiplyFactors(Ops, Factors))
1940 return nullptr; // All distinct factors, so nothing left for us to do.
1941
1942 IRBuilder<> Builder(I);
1943 // The reassociate transformation for FP operations is performed only
1944 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1945 // to the newly generated operations.
1946 if (auto FPI = dyn_cast<FPMathOperator>(I))
1947 Builder.setFastMathFlags(FPI->getFastMathFlags());
1948
1949 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1950 if (Ops.empty())
1951 return V;
1952
1953 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1954 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1955 return nullptr;
1956}
1957
1958Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1959 SmallVectorImpl<ValueEntry> &Ops) {
1960 // Now that we have the linearized expression tree, try to optimize it.
1961 // Start by folding any constants that we found.
1962 const DataLayout &DL = I->getDataLayout();
1963 Constant *Cst = nullptr;
1964 unsigned Opcode = I->getOpcode();
1965 while (!Ops.empty()) {
1966 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1967 if (!Cst) {
1968 Ops.pop_back();
1969 Cst = C;
1970 continue;
1971 }
1972 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1973 Ops.pop_back();
1974 Cst = Res;
1975 continue;
1976 }
1977 }
1978 break;
1979 }
1980 // If there was nothing but constants then we are done.
1981 if (Ops.empty())
1982 return Cst;
1983
1984 // Put the combined constant back at the end of the operand list, except if
1985 // there is no point. For example, an add of 0 gets dropped here, while a
1986 // multiplication by zero turns the whole expression into zero.
1987 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1988 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1989 return Cst;
1990 Ops.push_back(ValueEntry(0, Cst));
1991 }
1992
1993 if (Ops.size() == 1) return Ops[0].Op;
1994
1995 // Handle destructive annihilation due to identities between elements in the
1996 // argument list here.
1997 unsigned NumOps = Ops.size();
1998 switch (Opcode) {
1999 default: break;
2000 case Instruction::And:
2001 case Instruction::Or:
2002 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2003 return Result;
2004 break;
2005
2006 case Instruction::Xor:
2007 if (Value *Result = OptimizeXor(I, Ops))
2008 return Result;
2009 break;
2010
2011 case Instruction::Add:
2012 case Instruction::FAdd:
2013 if (Value *Result = OptimizeAdd(I, Ops))
2014 return Result;
2015 break;
2016
2017 case Instruction::Mul:
2018 case Instruction::FMul:
2019 if (Value *Result = OptimizeMul(I, Ops))
2020 return Result;
2021 break;
2022 }
2023
2024 if (Ops.size() != NumOps)
2025 return OptimizeExpression(I, Ops);
2026 return nullptr;
2027}
2028
2029// Remove dead instructions and if any operands are trivially dead add them to
2030// Insts so they will be removed as well.
2031void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2032 OrderedSet &Insts) {
2033 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2034 SmallVector<Value *, 4> Ops(I->operands());
2035 ValueRankMap.erase(I);
2036 Insts.remove(I);
2037 RedoInsts.remove(I);
2039 I->eraseFromParent();
2040 for (auto *Op : Ops)
2041 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
2042 if (OpInst->use_empty())
2043 Insts.insert(OpInst);
2044}
2045
2046/// Zap the given instruction, adding interesting operands to the work list.
2047void ReassociatePass::EraseInst(Instruction *I) {
2048 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2049 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2050
2051 SmallVector<Value *, 8> Ops(I->operands());
2052 // Erase the dead instruction.
2053 ValueRankMap.erase(I);
2054 RedoInsts.remove(I);
2056 I->eraseFromParent();
2057 // Optimize its operands.
2058 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
2059 for (Value *V : Ops)
2060 if (Instruction *Op = dyn_cast<Instruction>(V)) {
2061 // If this is a node in an expression tree, climb to the expression root
2062 // and add that since that's where optimization actually happens.
2063 unsigned Opcode = Op->getOpcode();
2064 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2065 Visited.insert(Op).second)
2066 Op = Op->user_back();
2067
2068 // The instruction we're going to push may be coming from a
2069 // dead block, and Reassociate skips the processing of unreachable
2070 // blocks because it's a waste of time and also because it can
2071 // lead to infinite loop due to LLVM's non-standard definition
2072 // of dominance.
2073 if (ValueRankMap.contains(Op))
2074 RedoInsts.insert(Op);
2075 }
2076
2077 MadeChange = true;
2078}
2079
2080/// Recursively analyze an expression to build a list of instructions that have
2081/// negative floating-point constant operands. The caller can then transform
2082/// the list to create positive constants for better reassociation and CSE.
2084 SmallVectorImpl<Instruction *> &Candidates) {
2085 // Handle only one-use instructions. Combining negations does not justify
2086 // replicating instructions.
2087 Instruction *I;
2088 if (!match(V, m_OneUse(m_Instruction(I))))
2089 return;
2090
2091 // Handle expressions of multiplications and divisions.
2092 // TODO: This could look through floating-point casts.
2093 const APFloat *C;
2094 switch (I->getOpcode()) {
2095 case Instruction::FMul:
2096 // Not expecting non-canonical code here. Bail out and wait.
2097 if (match(I->getOperand(0), m_Constant()))
2098 break;
2099
2100 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2101 Candidates.push_back(I);
2102 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2103 }
2104 getNegatibleInsts(I->getOperand(0), Candidates);
2105 getNegatibleInsts(I->getOperand(1), Candidates);
2106 break;
2107 case Instruction::FDiv:
2108 // Not expecting non-canonical code here. Bail out and wait.
2109 if (match(I->getOperand(0), m_Constant()) &&
2110 match(I->getOperand(1), m_Constant()))
2111 break;
2112
2113 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2114 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2115 Candidates.push_back(I);
2116 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2117 }
2118 getNegatibleInsts(I->getOperand(0), Candidates);
2119 getNegatibleInsts(I->getOperand(1), Candidates);
2120 break;
2121 default:
2122 break;
2123 }
2124}
2125
2126/// Given an fadd/fsub with an operand that is a one-use instruction
2127/// (the fadd/fsub), try to change negative floating-point constants into
2128/// positive constants to increase potential for reassociation and CSE.
2129Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2130 Instruction *Op,
2131 Value *OtherOp) {
2132 assert((I->getOpcode() == Instruction::FAdd ||
2133 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2134
2135 // Collect instructions with negative FP constants from the subtree that ends
2136 // in Op.
2137 SmallVector<Instruction *, 4> Candidates;
2138 getNegatibleInsts(Op, Candidates);
2139 if (Candidates.empty())
2140 return nullptr;
2141
2142 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2143 // resulting subtract will be broken up later. This can get us into an
2144 // infinite loop during reassociation.
2145 bool IsFSub = I->getOpcode() == Instruction::FSub;
2146 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2147 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2148 return nullptr;
2149
2150 for (Instruction *Negatible : Candidates) {
2151 const APFloat *C;
2152 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2153 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2154 "Expecting only 1 constant operand");
2155 assert(C->isNegative() && "Expected negative FP constant");
2156 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2157 MadeChange = true;
2158 }
2159 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2160 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2161 "Expecting only 1 constant operand");
2162 assert(C->isNegative() && "Expected negative FP constant");
2163 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2164 MadeChange = true;
2165 }
2166 }
2167 assert(MadeChange == true && "Negative constant candidate was not changed");
2168
2169 // Negations cancelled out.
2170 if (Candidates.size() % 2 == 0)
2171 return I;
2172
2173 // Negate the final operand in the expression by flipping the opcode of this
2174 // fadd/fsub.
2175 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2176 IRBuilder<> Builder(I);
2177 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2178 : Builder.CreateFSubFMF(OtherOp, Op, I);
2179 I->replaceAllUsesWith(NewInst);
2180 RedoInsts.insert(I);
2181 return dyn_cast<Instruction>(NewInst);
2182}
2183
2184/// Canonicalize expressions that contain a negative floating-point constant
2185/// of the following form:
2186/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2187/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2188/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2189///
2190/// The fadd/fsub opcode may be switched to allow folding a negation into the
2191/// input instruction.
2192Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2193 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2194 Value *X;
2195 Instruction *Op;
2197 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2198 I = R;
2200 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2201 I = R;
2203 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2204 I = R;
2205 return I;
2206}
2207
2208/// Inspect and optimize the given instruction. Note that erasing
2209/// instructions is not allowed.
2210void ReassociatePass::OptimizeInst(Instruction *I) {
2211 // Only consider operations that we understand.
2213 return;
2214
2215 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2216 // If an operand of this shift is a reassociable multiply, or if the shift
2217 // is used by a reassociable multiply or add, turn into a multiply.
2218 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2219 (I->hasOneUse() &&
2220 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2221 isReassociableOp(I->user_back(), Instruction::Add)))) {
2223 RedoInsts.insert(I);
2224 MadeChange = true;
2225 I = NI;
2226 }
2227
2228 // Commute binary operators, to canonicalize the order of their operands.
2229 // This can potentially expose more CSE opportunities, and makes writing other
2230 // transformations simpler.
2231 if (I->isCommutative())
2232 canonicalizeOperands(I);
2233
2234 // Canonicalize negative constants out of expressions.
2235 if (Instruction *Res = canonicalizeNegFPConstants(I))
2236 I = Res;
2237
2238 // Don't optimize floating-point instructions unless they have the
2239 // appropriate FastMathFlags for reassociation enabled.
2241 return;
2242
2243 // Do not reassociate boolean (i1/vXi1) expressions. We want to preserve the
2244 // original order of evaluation for short-circuited comparisons that
2245 // SimplifyCFG has folded to AND/OR expressions. If the expression
2246 // is not further optimized, it is likely to be transformed back to a
2247 // short-circuited form for code gen, and the source order may have been
2248 // optimized for the most likely conditions. For vector boolean expressions,
2249 // we should be optimizing for ILP and not serializing the logical operations.
2250 if (I->getType()->isIntOrIntVectorTy(1))
2251 return;
2252
2253 // If this is a bitwise or instruction of operands
2254 // with no common bits set, convert it to X+Y.
2255 if (I->getOpcode() == Instruction::Or &&
2257 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2258 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2259 SimplifyQuery(I->getDataLayout(),
2260 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2262 RedoInsts.insert(I);
2263 MadeChange = true;
2264 I = NI;
2265 }
2266
2267 if (I->getOpcode() == Instruction::Mul && ShouldBreakUpDistribution(I)) {
2268 Instruction *MulUser = cast<Instruction>(I->user_back());
2269 Instruction *NI = BreakUpDistribute(I, RedoInsts);
2270 RedoInsts.insert(I);
2271 RedoInsts.insert(MulUser);
2272 MadeChange = true;
2273 I = NI;
2274 }
2275
2276 // If this is a subtract instruction which is not already in negate form,
2277 // see if we can convert it to X+-Y.
2278 if (I->getOpcode() == Instruction::Sub) {
2279 if (ShouldBreakUpSubtract(I)) {
2280 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2281 RedoInsts.insert(I);
2282 MadeChange = true;
2283 I = NI;
2284 } else if (match(I, m_Neg(m_Value()))) {
2285 // Otherwise, this is a negation. See if the operand is a multiply tree
2286 // and if this is not an inner node of a multiply tree.
2287 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2288 (!I->hasOneUse() ||
2289 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2291 // If the negate was simplified, revisit the users to see if we can
2292 // reassociate further.
2293 for (User *U : NI->users()) {
2294 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2295 RedoInsts.insert(Tmp);
2296 }
2297 RedoInsts.insert(I);
2298 MadeChange = true;
2299 I = NI;
2300 }
2301 }
2302 } else if (I->getOpcode() == Instruction::FNeg ||
2303 I->getOpcode() == Instruction::FSub) {
2304 if (ShouldBreakUpSubtract(I)) {
2305 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2306 RedoInsts.insert(I);
2307 MadeChange = true;
2308 I = NI;
2309 } else if (match(I, m_FNeg(m_Value()))) {
2310 // Otherwise, this is a negation. See if the operand is a multiply tree
2311 // and if this is not an inner node of a multiply tree.
2312 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2313 I->getOperand(0);
2314 if (isReassociableOp(Op, Instruction::FMul) &&
2315 (!I->hasOneUse() ||
2316 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2317 // If the negate was simplified, revisit the users to see if we can
2318 // reassociate further.
2320 for (User *U : NI->users()) {
2321 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2322 RedoInsts.insert(Tmp);
2323 }
2324 RedoInsts.insert(I);
2325 MadeChange = true;
2326 I = NI;
2327 }
2328 }
2329 }
2330
2331 // If this instruction is an associative binary operator, process it.
2332 if (!I->isAssociative()) return;
2333 BinaryOperator *BO = cast<BinaryOperator>(I);
2334
2335 // If this is an interior node of a reassociable tree, ignore it until we
2336 // get to the root of the tree, to avoid N^2 analysis.
2337 unsigned Opcode = BO->getOpcode();
2338 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2339 // During the initial run we will get to the root of the tree.
2340 // But if we get here while we are redoing instructions, there is no
2341 // guarantee that the root will be visited. So Redo later
2342 if (BO->user_back() != BO &&
2343 BO->getParent() == BO->user_back()->getParent())
2344 RedoInsts.insert(BO->user_back());
2345 return;
2346 }
2347
2348 // If this is an add tree that is used by a sub instruction, ignore it
2349 // until we process the subtract.
2350 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2351 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2352 return;
2353 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2354 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2355 return;
2356
2357 ReassociateExpression(BO);
2358}
2359
2360void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2361 // First, walk the expression tree, linearizing the tree, collecting the
2362 // operand information.
2364 OverflowTracking Flags;
2365 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags);
2367 Ops.reserve(Tree.size());
2368 for (const RepeatedValue &E : Tree)
2369 Ops.append(E.second, ValueEntry(getRank(E.first), E.first));
2370
2371 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2372
2373 // Now that we have linearized the tree to a list and have gathered all of
2374 // the operands and their ranks, sort the operands by their rank. Use a
2375 // stable_sort so that values with equal ranks will have their relative
2376 // positions maintained (and so the compiler is deterministic). Note that
2377 // this sorts so that the highest ranking values end up at the beginning of
2378 // the vector.
2380
2381 // Now that we have the expression tree in a convenient
2382 // sorted form, optimize it globally if possible.
2383 if (Value *V = OptimizeExpression(I, Ops)) {
2384 if (V == I)
2385 // Self-referential expression in unreachable code.
2386 return;
2387 // This expression tree simplified to something that isn't a tree,
2388 // eliminate it.
2389 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2390 I->replaceAllUsesWith(V);
2391 if (Instruction *VI = dyn_cast<Instruction>(V))
2392 if (I->getDebugLoc())
2393 VI->setDebugLoc(I->getDebugLoc());
2394 RedoInsts.insert(I);
2395 ++NumAnnihil;
2396 return;
2397 }
2398
2399 // We want to sink immediates as deeply as possible except in the case where
2400 // this is a multiply tree used only by an add, and the immediate is a -1.
2401 // In this case we reassociate to put the negation on the outside so that we
2402 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2403 if (I->hasOneUse()) {
2404 if (I->getOpcode() == Instruction::Mul &&
2405 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2406 isa<ConstantInt>(Ops.back().Op) &&
2407 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2408 ValueEntry Tmp = Ops.pop_back_val();
2409 Ops.insert(Ops.begin(), Tmp);
2410 } else if (I->getOpcode() == Instruction::FMul &&
2411 cast<Instruction>(I->user_back())->getOpcode() ==
2412 Instruction::FAdd &&
2413 isa<ConstantFP>(Ops.back().Op) &&
2414 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2415 ValueEntry Tmp = Ops.pop_back_val();
2416 Ops.insert(Ops.begin(), Tmp);
2417 }
2418 }
2419
2420 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2421
2422 if (Ops.size() == 1) {
2423 if (Ops[0].Op == I)
2424 // Self-referential expression in unreachable code.
2425 return;
2426
2427 // This expression tree simplified to something that isn't a tree,
2428 // eliminate it.
2429 I->replaceAllUsesWith(Ops[0].Op);
2430 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2431 OI->setDebugLoc(I->getDebugLoc());
2432 RedoInsts.insert(I);
2433 return;
2434 }
2435
2436 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2437 // Find the pair with the highest count in the pairmap and move it to the
2438 // back of the list so that it can later be CSE'd.
2439 // example:
2440 // a*b*c*d*e
2441 // if c*e is the most "popular" pair, we can express this as
2442 // (((c*e)*d)*b)*a
2443 unsigned Max = 1;
2444 unsigned BestRank = 0;
2445 std::pair<unsigned, unsigned> BestPair;
2446 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2447 unsigned LimitIdx = 0;
2448 // With the CSE-driven heuristic, we are about to slap two values at the
2449 // beginning of the expression whereas they could live very late in the CFG.
2450 // When using the CSE-local heuristic we avoid creating dependences from
2451 // completely unrelated part of the CFG by limiting the expression
2452 // reordering on the values that live in the first seen basic block.
2453 // The main idea is that we want to avoid forming expressions that would
2454 // become loop dependent.
2455 if (UseCSELocalOpt) {
2456 const BasicBlock *FirstSeenBB = nullptr;
2457 int StartIdx = Ops.size() - 1;
2458 // Skip the first value of the expression since we need at least two
2459 // values to materialize an expression. I.e., even if this value is
2460 // anchored in a different basic block, the actual first sub expression
2461 // will be anchored on the second value.
2462 for (int i = StartIdx - 1; i != -1; --i) {
2463 const Value *Val = Ops[i].Op;
2464 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2465 const BasicBlock *SeenBB = nullptr;
2466 if (!CurrLeafInstr) {
2467 // The value is free of any CFG dependencies.
2468 // Do as if it lives in the entry block.
2469 //
2470 // We do this to make sure all the values falling on this path are
2471 // seen through the same anchor point. The rationale is these values
2472 // can be combined together to from a sub expression free of any CFG
2473 // dependencies so we want them to stay together.
2474 // We could be cleverer and postpone the anchor down to the first
2475 // anchored value, but that's likely complicated to get right.
2476 // E.g., we wouldn't want to do that if that means being stuck in a
2477 // loop.
2478 //
2479 // For instance, we wouldn't want to change:
2480 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2481 // into
2482 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2483 // Because all the sub expressions with arg2..N would be stuck between
2484 // two loop dependent values.
2485 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2486 } else {
2487 SeenBB = CurrLeafInstr->getParent();
2488 }
2489
2490 if (!FirstSeenBB) {
2491 FirstSeenBB = SeenBB;
2492 continue;
2493 }
2494 if (FirstSeenBB != SeenBB) {
2495 // ith value is in a different basic block.
2496 // Rewind the index once to point to the last value on the same basic
2497 // block.
2498 LimitIdx = i + 1;
2499 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2500 << LimitIdx << ", " << StartIdx << "]\n");
2501 break;
2502 }
2503 }
2504 }
2505 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2506 // We must use int type to go below zero when LimitIdx is 0.
2507 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2508 unsigned Score = 0;
2509 Value *Op0 = Ops[i].Op;
2510 Value *Op1 = Ops[j].Op;
2511 if (std::less<Value *>()(Op1, Op0))
2512 std::swap(Op0, Op1);
2513 auto it = PairMap[Idx].find({Op0, Op1});
2514 if (it != PairMap[Idx].end()) {
2515 // Functions like BreakUpSubtract() can erase the Values we're using
2516 // as keys and create new Values after we built the PairMap. There's a
2517 // small chance that the new nodes can have the same address as
2518 // something already in the table. We shouldn't accumulate the stored
2519 // score in that case as it refers to the wrong Value.
2520 if (it->second.isValid())
2521 Score += it->second.Score;
2522 }
2523
2524 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2525
2526 // By construction, the operands are sorted in reverse order of their
2527 // topological order.
2528 // So we tend to form (sub) expressions with values that are close to
2529 // each other.
2530 //
2531 // Now to expose more CSE opportunities we want to expose the pair of
2532 // operands that occur the most (as statically computed in
2533 // BuildPairMap.) as the first sub-expression.
2534 //
2535 // If two pairs occur as many times, we pick the one with the
2536 // lowest rank, meaning the one with both operands appearing first in
2537 // the topological order.
2538 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2539 BestPair = {j, i};
2540 Max = Score;
2541 BestRank = MaxRank;
2542 }
2543 }
2544 }
2545 if (Max > 1) {
2546 auto Op0 = Ops[BestPair.first];
2547 auto Op1 = Ops[BestPair.second];
2548 Ops.erase(&Ops[BestPair.second]);
2549 Ops.erase(&Ops[BestPair.first]);
2550 Ops.push_back(Op0);
2551 Ops.push_back(Op1);
2552 }
2553 }
2554 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2555 dbgs() << '\n');
2556 // Now that we ordered and optimized the expressions, splat them back into
2557 // the expression tree, removing any unneeded nodes.
2558 RewriteExprTree(I, Ops, Flags);
2559}
2560
2561void
2562ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2563 // Make a "pairmap" of how often each operand pair occurs.
2564 for (BasicBlock *BI : RPOT) {
2565 for (Instruction &I : *BI) {
2566 if (!I.isAssociative() || !I.isBinaryOp())
2567 continue;
2568
2569 // Ignore nodes that aren't at the root of trees.
2570 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2571 continue;
2572
2573 // Collect all operands in a single reassociable expression.
2574 // Since Reassociate has already been run once, we can assume things
2575 // are already canonical according to Reassociation's regime.
2576 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2577 SmallVector<Value *, 8> Ops;
2578 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2579 Value *Op = Worklist.pop_back_val();
2581 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2582 Ops.push_back(Op);
2583 continue;
2584 }
2585 // Be paranoid about self-referencing expressions in unreachable code.
2586 if (OpI->getOperand(0) != OpI)
2587 Worklist.push_back(OpI->getOperand(0));
2588 if (OpI->getOperand(1) != OpI)
2589 Worklist.push_back(OpI->getOperand(1));
2590 }
2591 // Skip extremely long expressions.
2592 if (Ops.size() > GlobalReassociateLimit)
2593 continue;
2594
2595 // Add all pairwise combinations of operands to the pair map.
2596 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2597 SmallSet<std::pair<Value *, Value*>, 32> Visited;
2598 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2599 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2600 // Canonicalize operand orderings.
2601 Value *Op0 = Ops[i];
2602 Value *Op1 = Ops[j];
2603 if (std::less<Value *>()(Op1, Op0))
2604 std::swap(Op0, Op1);
2605 if (!Visited.insert({Op0, Op1}).second)
2606 continue;
2607 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2608 if (!res.second) {
2609 // If either key value has been erased then we've got the same
2610 // address by coincidence. That can't happen here because nothing is
2611 // erasing values but it can happen by the time we're querying the
2612 // map.
2613 assert(res.first->second.isValid() && "WeakVH invalidated");
2614 ++res.first->second.Score;
2615 }
2616 }
2617 }
2618 }
2619 }
2620}
2621
2623 // Get the functions basic blocks in Reverse Post Order. This order is used by
2624 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2625 // blocks (it has been seen that the analysis in this pass could hang when
2626 // analysing dead basic blocks).
2628
2629 // Calculate the rank map for F.
2630 BuildRankMap(F, RPOT);
2631
2632 // Build the pair map before running reassociate.
2633 // Technically this would be more accurate if we did it after one round
2634 // of reassociation, but in practice it doesn't seem to help much on
2635 // real-world code, so don't waste the compile time running reassociate
2636 // twice.
2637 // If a user wants, they could expicitly run reassociate twice in their
2638 // pass pipeline for further potential gains.
2639 // It might also be possible to update the pair map during runtime, but the
2640 // overhead of that may be large if there's many reassociable chains.
2641 BuildPairMap(RPOT);
2642
2643 MadeChange = false;
2644
2645 // Traverse the same blocks that were analysed by BuildRankMap.
2646 for (BasicBlock *BI : RPOT) {
2647 assert(RankMap.count(&*BI) && "BB should be ranked.");
2648 // Optimize every instruction in the basic block.
2649 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2651 EraseInst(&*II++);
2652 } else {
2653 OptimizeInst(&*II);
2654 assert(II->getParent() == &*BI && "Moved to a different block!");
2655 ++II;
2656 }
2657
2658 // Make a copy of all the instructions to be redone so we can remove dead
2659 // instructions.
2660 OrderedSet ToRedo(RedoInsts);
2661 // Iterate over all instructions to be reevaluated and remove trivially dead
2662 // instructions. If any operand of the trivially dead instruction becomes
2663 // dead mark it for deletion as well. Continue this process until all
2664 // trivially dead instructions have been removed.
2665 while (!ToRedo.empty()) {
2666 Instruction *I = ToRedo.pop_back_val();
2668 RecursivelyEraseDeadInsts(I, ToRedo);
2669 MadeChange = true;
2670 }
2671 }
2672
2673 // Now that we have removed dead instructions, we can reoptimize the
2674 // remaining instructions.
2675 while (!RedoInsts.empty()) {
2676 Instruction *I = RedoInsts.front();
2677 RedoInsts.erase(RedoInsts.begin());
2679 EraseInst(I);
2680 else
2681 OptimizeInst(I);
2682 }
2683 }
2684
2685 // We are done with the rank map and pair map.
2686 RankMap.clear();
2687 ValueRankMap.clear();
2688 for (auto &Entry : PairMap)
2689 Entry.clear();
2690
2691 if (MadeChange) {
2694 return PA;
2695 }
2696
2697 return PreservedAnalyses::all();
2698}
2699
2700namespace {
2701
2702class ReassociateLegacyPass : public FunctionPass {
2703 ReassociatePass Impl;
2704
2705public:
2706 static char ID; // Pass identification, replacement for typeid
2707
2708 ReassociateLegacyPass() : FunctionPass(ID) {
2710 }
2711
2712 bool runOnFunction(Function &F) override {
2713 if (skipFunction(F))
2714 return false;
2715
2716 FunctionAnalysisManager DummyFAM;
2717 auto PA = Impl.run(F, DummyFAM);
2718 return !PA.areAllPreserved();
2719 }
2720
2721 void getAnalysisUsage(AnalysisUsage &AU) const override {
2722 AU.setPreservesCFG();
2723 AU.addPreserved<AAResultsWrapperPass>();
2724 AU.addPreserved<BasicAAWrapperPass>();
2725 AU.addPreserved<GlobalsAAWrapperPass>();
2726 }
2727};
2728
2729} // end anonymous namespace
2730
2731char ReassociateLegacyPass::ID = 0;
2732
2733INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2734 "Reassociate expressions", false, false)
2735
2736// Public interface to the Reassociate pass
2738 return new ReassociateLegacyPass();
2739}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition IVUsers.cpp:56
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition LICM.cpp:2735
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, OverflowTracking &Flags)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
std::pair< Value *, uint64_t > RepeatedValue
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static bool ShouldBreakUpDistribution(Instruction *Mul)
Return true if Mul is of the form (X+Y)*C or (X-Y)*C where C is a constant, and there exists a siblin...
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpDistribute(Instruction *Mul, ReassociatePass::OrderedSet &ToRedo)
Distribute Mul of the form (X+Y)*C into X*C + Y*C.
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static Instruction * CreateNeg(Value *S1, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
static bool isLoadCombineCandidate(Instruction *Or)
static Value * EmitAddTreeOfValues(Instruction *I, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:275
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static LLVM_ABI Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty, bool AllowLHSConstant=false)
Return the absorbing element for the given binary operation, i.e.
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
This provides a helper for copying FMF from an instruction or setting specified flags.
Definition IRBuilder.h:93
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateFSubFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Definition IRBuilder.h:1712
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition IRBuilder.h:352
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Definition IRBuilder.h:1693
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Reassociate commutative expressions.
Definition Reassociate.h:74
DenseMap< BasicBlock *, unsigned > RankMap
Definition Reassociate.h:80
DenseMap< AssertingVH< Value >, unsigned > ValueRankMap
Definition Reassociate.h:81
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
SetVector< AssertingVH< Instruction >, std::deque< AssertingVH< Instruction > > > OrderedSet
Definition Reassociate.h:76
DenseMap< std::pair< Value *, Value * >, PairMapValue > PairMap[NumBinaryOps]
Definition Reassociate.h:95
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Use & Op()
Definition User.h:171
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void deleteValue()
Delete a pointer to a generic Value.
Definition Value.cpp:108
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
constexpr double e
A private "module" namespace for types and utilities used by Reassociate.
Definition Reassociate.h:47
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1690
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1666
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI void initializeReassociateLegacyPassPass(PassRegistry &)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI FunctionPass * createReassociatePass()
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2051
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
Utility class representing a base and exponent pair which form one factor of some product.
Definition Reassociate.h:62