LLVM 19.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
87 dbgs() << "[ ";
88 Ops[i].Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Ops[i].Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
245 cast<BinaryOperator>(I)->swapOperands();
246}
247
249 BasicBlock::iterator InsertBefore,
250 Value *FlagsOp) {
251 if (S1->getType()->isIntOrIntVectorTy())
252 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
253 else {
254 BinaryOperator *Res =
255 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
256 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
257 return Res;
258 }
259}
260
262 BasicBlock::iterator InsertBefore,
263 Value *FlagsOp) {
264 if (S1->getType()->isIntOrIntVectorTy())
265 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
266 else {
267 BinaryOperator *Res =
268 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
269 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
270 return Res;
271 }
272}
273
275 BasicBlock::iterator InsertBefore,
276 Value *FlagsOp) {
277 if (S1->getType()->isIntOrIntVectorTy())
278 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
279
280 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
281 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
282
283 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
284}
285
286/// Replace 0-X with X*-1.
288 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
289 "Expected a Negate!");
290 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
291 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
292 Type *Ty = Neg->getType();
293 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
294 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
295
296 BinaryOperator *Res =
297 CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
298 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
299 Res->takeName(Neg);
300 Neg->replaceAllUsesWith(Res);
301 Res->setDebugLoc(Neg->getDebugLoc());
302 return Res;
303}
304
305using RepeatedValue = std::pair<Value *, uint64_t>;
306
307/// Given an associative binary expression, return the leaf
308/// nodes in Ops along with their weights (how many times the leaf occurs). The
309/// original expression is the same as
310/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
311/// op
312/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
313/// op
314/// ...
315/// op
316/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
317///
318/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
319///
320/// This routine may modify the function, in which case it returns 'true'. The
321/// changes it makes may well be destructive, changing the value computed by 'I'
322/// to something completely different. Thus if the routine returns 'true' then
323/// you MUST either replace I with a new expression computed from the Ops array,
324/// or use RewriteExprTree to put the values back in.
325///
326/// A leaf node is either not a binary operation of the same kind as the root
327/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
328/// opcode), or is the same kind of binary operator but has a use which either
329/// does not belong to the expression, or does belong to the expression but is
330/// a leaf node. Every leaf node has at least one use that is a non-leaf node
331/// of the expression, while for non-leaf nodes (except for the root 'I') every
332/// use is a non-leaf node of the expression.
333///
334/// For example:
335/// expression graph node names
336///
337/// + | I
338/// / \ |
339/// + + | A, B
340/// / \ / \ |
341/// * + * | C, D, E
342/// / \ / \ / \ |
343/// + * | F, G
344///
345/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
346/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
347///
348/// The expression is maximal: if some instruction is a binary operator of the
349/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
350/// then the instruction also belongs to the expression, is not a leaf node of
351/// it, and its operands also belong to the expression (but may be leaf nodes).
352///
353/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
354/// order to ensure that every non-root node in the expression has *exactly one*
355/// use by a non-leaf node of the expression. This destruction means that the
356/// caller MUST either replace 'I' with a new expression or use something like
357/// RewriteExprTree to put the values back in if the routine indicates that it
358/// made a change by returning 'true'.
359///
360/// In the above example either the right operand of A or the left operand of B
361/// will be replaced by undef. If it is B's operand then this gives:
362///
363/// + | I
364/// / \ |
365/// + + | A, B - operand of B replaced with undef
366/// / \ \ |
367/// * + * | C, D, E
368/// / \ / \ / \ |
369/// + * | F, G
370///
371/// Note that such undef operands can only be reached by passing through 'I'.
372/// For example, if you visit operands recursively starting from a leaf node
373/// then you will never see such an undef operand unless you get back to 'I',
374/// which requires passing through a phi node.
375///
376/// Note that this routine may also mutate binary operators of the wrong type
377/// that have all uses inside the expression (i.e. only used by non-leaf nodes
378/// of the expression) if it can turn them into binary operators of the right
379/// type and thus make the expression bigger.
384 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
385 "Expected a UnaryOperator or BinaryOperator!");
386 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
387 unsigned Opcode = I->getOpcode();
388 assert(I->isAssociative() && I->isCommutative() &&
389 "Expected an associative and commutative operation!");
390
391 // Visit all operands of the expression, keeping track of their weight (the
392 // number of paths from the expression root to the operand, or if you like
393 // the number of times that operand occurs in the linearized expression).
394 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
395 // while A has weight two.
396
397 // Worklist of non-leaf nodes (their operands are in the expression too) along
398 // with their weights, representing a certain number of paths to the operator.
399 // If an operator occurs in the worklist multiple times then we found multiple
400 // ways to get to it.
401 SmallVector<std::pair<Instruction *, uint64_t>, 8> Worklist; // (Op, Weight)
402 Worklist.push_back(std::make_pair(I, 1));
403 bool Changed = false;
404
405 // Leaves of the expression are values that either aren't the right kind of
406 // operation (eg: a constant, or a multiply in an add tree), or are, but have
407 // some uses that are not inside the expression. For example, in I = X + X,
408 // X = A + B, the value X has two uses (by I) that are in the expression. If
409 // X has any other uses, for example in a return instruction, then we consider
410 // X to be a leaf, and won't analyze it further. When we first visit a value,
411 // if it has more than one use then at first we conservatively consider it to
412 // be a leaf. Later, as the expression is explored, we may discover some more
413 // uses of the value from inside the expression. If all uses turn out to be
414 // from within the expression (and the value is a binary operator of the right
415 // kind) then the value is no longer considered to be a leaf, and its operands
416 // are explored.
417
418 // Leaves - Keeps track of the set of putative leaves as well as the number of
419 // paths to each leaf seen so far.
420 using LeafMap = DenseMap<Value *, uint64_t>;
421 LeafMap Leaves; // Leaf -> Total weight so far.
422 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
423 const DataLayout DL = I->getDataLayout();
424
425#ifndef NDEBUG
426 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
427#endif
428 while (!Worklist.empty()) {
429 // We examine the operands of this binary operator.
430 auto [I, Weight] = Worklist.pop_back_val();
431
432 if (isa<OverflowingBinaryOperator>(I)) {
433 Flags.HasNUW &= I->hasNoUnsignedWrap();
434 Flags.HasNSW &= I->hasNoSignedWrap();
435 }
436
437 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
438 Value *Op = I->getOperand(OpIdx);
439 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
440 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
441
442 // If this is a binary operation of the right kind with only one use then
443 // add its operands to the expression.
444 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
445 assert(Visited.insert(Op).second && "Not first visit!");
446 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
447 Worklist.push_back(std::make_pair(BO, Weight));
448 continue;
449 }
450
451 // Appears to be a leaf. Is the operand already in the set of leaves?
452 LeafMap::iterator It = Leaves.find(Op);
453 if (It == Leaves.end()) {
454 // Not in the leaf map. Must be the first time we saw this operand.
455 assert(Visited.insert(Op).second && "Not first visit!");
456 if (!Op->hasOneUse()) {
457 // This value has uses not accounted for by the expression, so it is
458 // not safe to modify. Mark it as being a leaf.
460 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
461 LeafOrder.push_back(Op);
462 Leaves[Op] = Weight;
463 continue;
464 }
465 // No uses outside the expression, try morphing it.
466 } else {
467 // Already in the leaf map.
468 assert(It != Leaves.end() && Visited.count(Op) &&
469 "In leaf map but not visited!");
470
471 // Update the number of paths to the leaf.
472 It->second += Weight;
473 assert(It->second >= Weight && "Weight overflows");
474
475 // If we still have uses that are not accounted for by the expression
476 // then it is not safe to modify the value.
477 if (!Op->hasOneUse())
478 continue;
479
480 // No uses outside the expression, try morphing it.
481 Weight = It->second;
482 Leaves.erase(It); // Since the value may be morphed below.
483 }
484
485 // At this point we have a value which, first of all, is not a binary
486 // expression of the right kind, and secondly, is only used inside the
487 // expression. This means that it can safely be modified. See if we
488 // can usefully morph it into an expression of the right kind.
489 assert((!isa<Instruction>(Op) ||
490 cast<Instruction>(Op)->getOpcode() != Opcode
491 || (isa<FPMathOperator>(Op) &&
492 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
493 "Should have been handled above!");
494 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
495
496 // If this is a multiply expression, turn any internal negations into
497 // multiplies by -1 so they can be reassociated. Add any users of the
498 // newly created multiplication by -1 to the redo list, so any
499 // reassociation opportunities that are exposed will be reassociated
500 // further.
501 Instruction *Neg;
502 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
503 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
504 match(Op, m_Instruction(Neg))) {
506 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
508 LLVM_DEBUG(dbgs() << *Mul << '\n');
509 Worklist.push_back(std::make_pair(Mul, Weight));
510 for (User *U : Mul->users()) {
511 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
512 ToRedo.insert(UserBO);
513 }
514 ToRedo.insert(Neg);
515 Changed = true;
516 continue;
517 }
518
519 // Failed to morph into an expression of the right type. This really is
520 // a leaf.
521 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
522 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
523 LeafOrder.push_back(Op);
524 Leaves[Op] = Weight;
525 }
526 }
527
528 // The leaves, repeated according to their weights, represent the linearized
529 // form of the expression.
530 for (Value *V : LeafOrder) {
531 LeafMap::iterator It = Leaves.find(V);
532 if (It == Leaves.end())
533 // Node initially thought to be a leaf wasn't.
534 continue;
535 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
536 uint64_t Weight = It->second;
537 // Ensure the leaf is only output once.
538 It->second = 0;
539 Ops.push_back(std::make_pair(V, Weight));
540 if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
541 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
542 }
543
544 // For nilpotent operations or addition there may be no operands, for example
545 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
546 // in both cases the weight reduces to 0 causing the value to be skipped.
547 if (Ops.empty()) {
548 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
549 assert(Identity && "Associative operation without identity!");
550 Ops.emplace_back(Identity, 1);
551 }
552
553 return Changed;
554}
555
556/// Now that the operands for this expression tree are
557/// linearized and optimized, emit them in-order.
558void ReassociatePass::RewriteExprTree(BinaryOperator *I,
560 OverflowTracking Flags) {
561 assert(Ops.size() > 1 && "Single values should be used directly!");
562
563 // Since our optimizations should never increase the number of operations, the
564 // new expression can usually be written reusing the existing binary operators
565 // from the original expression tree, without creating any new instructions,
566 // though the rewritten expression may have a completely different topology.
567 // We take care to not change anything if the new expression will be the same
568 // as the original. If more than trivial changes (like commuting operands)
569 // were made then we are obliged to clear out any optional subclass data like
570 // nsw flags.
571
572 /// NodesToRewrite - Nodes from the original expression available for writing
573 /// the new expression into.
574 SmallVector<BinaryOperator*, 8> NodesToRewrite;
575 unsigned Opcode = I->getOpcode();
577
578 /// NotRewritable - The operands being written will be the leaves of the new
579 /// expression and must not be used as inner nodes (via NodesToRewrite) by
580 /// mistake. Inner nodes are always reassociable, and usually leaves are not
581 /// (if they were they would have been incorporated into the expression and so
582 /// would not be leaves), so most of the time there is no danger of this. But
583 /// in rare cases a leaf may become reassociable if an optimization kills uses
584 /// of it, or it may momentarily become reassociable during rewriting (below)
585 /// due it being removed as an operand of one of its uses. Ensure that misuse
586 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
587 /// leaves and refusing to reuse any of them as inner nodes.
588 SmallPtrSet<Value*, 8> NotRewritable;
589 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
590 NotRewritable.insert(Ops[i].Op);
591
592 // ExpressionChangedStart - Non-null if the rewritten expression differs from
593 // the original in some non-trivial way, requiring the clearing of optional
594 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
595 // ExpressionChangedEnd inclusive.
596 BinaryOperator *ExpressionChangedStart = nullptr,
597 *ExpressionChangedEnd = nullptr;
598 for (unsigned i = 0; ; ++i) {
599 // The last operation (which comes earliest in the IR) is special as both
600 // operands will come from Ops, rather than just one with the other being
601 // a subexpression.
602 if (i+2 == Ops.size()) {
603 Value *NewLHS = Ops[i].Op;
604 Value *NewRHS = Ops[i+1].Op;
605 Value *OldLHS = Op->getOperand(0);
606 Value *OldRHS = Op->getOperand(1);
607
608 if (NewLHS == OldLHS && NewRHS == OldRHS)
609 // Nothing changed, leave it alone.
610 break;
611
612 if (NewLHS == OldRHS && NewRHS == OldLHS) {
613 // The order of the operands was reversed. Swap them.
614 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
615 Op->swapOperands();
616 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
617 MadeChange = true;
618 ++NumChanged;
619 break;
620 }
621
622 // The new operation differs non-trivially from the original. Overwrite
623 // the old operands with the new ones.
624 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
625 if (NewLHS != OldLHS) {
626 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
627 if (BO && !NotRewritable.count(BO))
628 NodesToRewrite.push_back(BO);
629 Op->setOperand(0, NewLHS);
630 }
631 if (NewRHS != OldRHS) {
632 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
633 if (BO && !NotRewritable.count(BO))
634 NodesToRewrite.push_back(BO);
635 Op->setOperand(1, NewRHS);
636 }
637 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
638
639 ExpressionChangedStart = Op;
640 if (!ExpressionChangedEnd)
641 ExpressionChangedEnd = Op;
642 MadeChange = true;
643 ++NumChanged;
644
645 break;
646 }
647
648 // Not the last operation. The left-hand side will be a sub-expression
649 // while the right-hand side will be the current element of Ops.
650 Value *NewRHS = Ops[i].Op;
651 if (NewRHS != Op->getOperand(1)) {
652 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
653 if (NewRHS == Op->getOperand(0)) {
654 // The new right-hand side was already present as the left operand. If
655 // we are lucky then swapping the operands will sort out both of them.
656 Op->swapOperands();
657 } else {
658 // Overwrite with the new right-hand side.
659 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
660 if (BO && !NotRewritable.count(BO))
661 NodesToRewrite.push_back(BO);
662 Op->setOperand(1, NewRHS);
663 ExpressionChangedStart = Op;
664 if (!ExpressionChangedEnd)
665 ExpressionChangedEnd = Op;
666 }
667 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
668 MadeChange = true;
669 ++NumChanged;
670 }
671
672 // Now deal with the left-hand side. If this is already an operation node
673 // from the original expression then just rewrite the rest of the expression
674 // into it.
675 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
676 if (BO && !NotRewritable.count(BO)) {
677 Op = BO;
678 continue;
679 }
680
681 // Otherwise, grab a spare node from the original expression and use that as
682 // the left-hand side. If there are no nodes left then the optimizers made
683 // an expression with more nodes than the original! This usually means that
684 // they did something stupid but it might mean that the problem was just too
685 // hard (finding the mimimal number of multiplications needed to realize a
686 // multiplication expression is NP-complete). Whatever the reason, smart or
687 // stupid, create a new node if there are none left.
688 BinaryOperator *NewOp;
689 if (NodesToRewrite.empty()) {
690 Constant *Poison = PoisonValue::get(I->getType());
691 NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Poison,
692 Poison, "", I->getIterator());
693 if (isa<FPMathOperator>(NewOp))
694 NewOp->setFastMathFlags(I->getFastMathFlags());
695 } else {
696 NewOp = NodesToRewrite.pop_back_val();
697 }
698
699 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
700 Op->setOperand(0, NewOp);
701 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
702 ExpressionChangedStart = Op;
703 if (!ExpressionChangedEnd)
704 ExpressionChangedEnd = Op;
705 MadeChange = true;
706 ++NumChanged;
707 Op = NewOp;
708 }
709
710 // If the expression changed non-trivially then clear out all subclass data
711 // starting from the operator specified in ExpressionChanged, and compactify
712 // the operators to just before the expression root to guarantee that the
713 // expression tree is dominated by all of Ops.
714 if (ExpressionChangedStart) {
715 bool ClearFlags = true;
716 do {
717 // Preserve flags.
718 if (ClearFlags) {
719 if (isa<FPMathOperator>(I)) {
720 FastMathFlags Flags = I->getFastMathFlags();
721 ExpressionChangedStart->clearSubclassOptionalData();
722 ExpressionChangedStart->setFastMathFlags(Flags);
723 } else {
724 ExpressionChangedStart->clearSubclassOptionalData();
725 // Note that it doesn't hold for mul if one of the operands is zero.
726 // TODO: We can preserve NUW flag if we prove that all mul operands
727 // are non-zero.
728 if (ExpressionChangedStart->getOpcode() == Instruction::Add) {
729 if (Flags.HasNUW)
730 ExpressionChangedStart->setHasNoUnsignedWrap();
731 if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW))
732 ExpressionChangedStart->setHasNoSignedWrap();
733 }
734 }
735 }
736
737 if (ExpressionChangedStart == ExpressionChangedEnd)
738 ClearFlags = false;
739 if (ExpressionChangedStart == I)
740 break;
741
742 // Discard any debug info related to the expressions that has changed (we
743 // can leave debug info related to the root and any operation that didn't
744 // change, since the result of the expression tree should be the same
745 // even after reassociation).
746 if (ClearFlags)
747 replaceDbgUsesWithUndef(ExpressionChangedStart);
748
749 ExpressionChangedStart->moveBefore(I);
750 ExpressionChangedStart =
751 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
752 } while (true);
753 }
754
755 // Throw away any left over nodes from the original expression.
756 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
757 RedoInsts.insert(NodesToRewrite[i]);
758}
759
760/// Insert instructions before the instruction pointed to by BI,
761/// that computes the negative version of the value specified. The negative
762/// version of the value is returned, and BI is left pointing at the instruction
763/// that should be processed next by the reassociation pass.
764/// Also add intermediate instructions to the redo list that are modified while
765/// pushing the negates through adds. These will be revisited to see if
766/// additional opportunities have been exposed.
769 if (auto *C = dyn_cast<Constant>(V)) {
770 const DataLayout &DL = BI->getDataLayout();
771 Constant *Res = C->getType()->isFPOrFPVectorTy()
772 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
774 if (Res)
775 return Res;
776 }
777
778 // We are trying to expose opportunity for reassociation. One of the things
779 // that we want to do to achieve this is to push a negation as deep into an
780 // expression chain as possible, to expose the add instructions. In practice,
781 // this means that we turn this:
782 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
783 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
784 // the constants. We assume that instcombine will clean up the mess later if
785 // we introduce tons of unnecessary negation instructions.
786 //
787 if (BinaryOperator *I =
788 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
789 // Push the negates through the add.
790 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
791 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
792 if (I->getOpcode() == Instruction::Add) {
793 I->setHasNoUnsignedWrap(false);
794 I->setHasNoSignedWrap(false);
795 }
796
797 // We must move the add instruction here, because the neg instructions do
798 // not dominate the old add instruction in general. By moving it, we are
799 // assured that the neg instructions we just inserted dominate the
800 // instruction we are about to insert after them.
801 //
802 I->moveBefore(BI);
803 I->setName(I->getName()+".neg");
804
805 // Add the intermediate negates to the redo list as processing them later
806 // could expose more reassociating opportunities.
807 ToRedo.insert(I);
808 return I;
809 }
810
811 // Okay, we need to materialize a negated version of V with an instruction.
812 // Scan the use lists of V to see if we have one already.
813 for (User *U : V->users()) {
814 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
815 continue;
816
817 // We found one! Now we have to make sure that the definition dominates
818 // this use. We do this by moving it to the entry block (if it is a
819 // non-instruction value) or right after the definition. These negates will
820 // be zapped by reassociate later, so we don't need much finesse here.
821 Instruction *TheNeg = dyn_cast<Instruction>(U);
822
823 // We can't safely propagate a vector zero constant with poison/undef lanes.
824 Constant *C;
825 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
826 C->containsUndefOrPoisonElement())
827 continue;
828
829 // Verify that the negate is in this function, V might be a constant expr.
830 if (!TheNeg ||
831 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
832 continue;
833
834 BasicBlock::iterator InsertPt;
835 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
836 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
837 if (!InsertPtOpt)
838 continue;
839 InsertPt = *InsertPtOpt;
840 } else {
841 InsertPt = TheNeg->getFunction()
842 ->getEntryBlock()
844 ->getIterator();
845 }
846
847 // Check that if TheNeg is moved out of its parent block, we drop its
848 // debug location to avoid extra coverage.
849 // See test dropping_debugloc_the_neg.ll for a detailed example.
850 if (TheNeg->getParent() != InsertPt->getParent())
851 TheNeg->dropLocation();
852 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
853
854 if (TheNeg->getOpcode() == Instruction::Sub) {
855 TheNeg->setHasNoUnsignedWrap(false);
856 TheNeg->setHasNoSignedWrap(false);
857 } else {
858 TheNeg->andIRFlags(BI);
859 }
860 ToRedo.insert(TheNeg);
861 return TheNeg;
862 }
863
864 // Insert a 'neg' instruction that subtracts the value from zero to get the
865 // negation.
866 Instruction *NewNeg =
867 CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
868 ToRedo.insert(NewNeg);
869 return NewNeg;
870}
871
872// See if this `or` looks like an load widening reduction, i.e. that it
873// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
874// ensure that the pattern is *really* a load widening reduction,
875// we do not ensure that it can really be replaced with a widened load,
876// only that it mostly looks like one.
880
881 auto Enqueue = [&](Value *V) {
882 auto *I = dyn_cast<Instruction>(V);
883 // Each node of an `or` reduction must be an instruction,
884 if (!I)
885 return false; // Node is certainly not part of an `or` load reduction.
886 // Only process instructions we have never processed before.
887 if (Visited.insert(I).second)
888 Worklist.emplace_back(I);
889 return true; // Will need to look at parent nodes.
890 };
891
892 if (!Enqueue(Or))
893 return false; // Not an `or` reduction pattern.
894
895 while (!Worklist.empty()) {
896 auto *I = Worklist.pop_back_val();
897
898 // Okay, which instruction is this node?
899 switch (I->getOpcode()) {
900 case Instruction::Or:
901 // Got an `or` node. That's fine, just recurse into it's operands.
902 for (Value *Op : I->operands())
903 if (!Enqueue(Op))
904 return false; // Not an `or` reduction pattern.
905 continue;
906
907 case Instruction::Shl:
908 case Instruction::ZExt:
909 // `shl`/`zext` nodes are fine, just recurse into their base operand.
910 if (!Enqueue(I->getOperand(0)))
911 return false; // Not an `or` reduction pattern.
912 continue;
913
914 case Instruction::Load:
915 // Perfect, `load` node means we've reached an edge of the graph.
916 continue;
917
918 default: // Unknown node.
919 return false; // Not an `or` reduction pattern.
920 }
921 }
922
923 return true;
924}
925
926/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
928 // Don't bother to convert this up unless either the LHS is an associable add
929 // or subtract or mul or if this is only used by one of the above.
930 // This is only a compile-time improvement, it is not needed for correctness!
931 auto isInteresting = [](Value *V) {
932 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
933 Instruction::Shl})
934 if (isReassociableOp(V, Op))
935 return true;
936 return false;
937 };
938
939 if (any_of(Or->operands(), isInteresting))
940 return true;
941
942 Value *VB = Or->user_back();
943 if (Or->hasOneUse() && isInteresting(VB))
944 return true;
945
946 return false;
947}
948
949/// If we have (X|Y), and iff X and Y have no common bits set,
950/// transform this into (X+Y) to allow arithmetics reassociation.
952 // Convert an or into an add.
953 BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
954 Or->getIterator(), Or);
955 New->setHasNoSignedWrap();
956 New->setHasNoUnsignedWrap();
957 New->takeName(Or);
958
959 // Everyone now refers to the add instruction.
960 Or->replaceAllUsesWith(New);
961 New->setDebugLoc(Or->getDebugLoc());
962
963 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
964 return New;
965}
966
967/// Return true if we should break up this subtract of X-Y into (X + -Y).
969 // If this is a negation, we can't split it up!
970 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
971 return false;
972
973 // Don't breakup X - undef.
974 if (isa<UndefValue>(Sub->getOperand(1)))
975 return false;
976
977 // Don't bother to break this up unless either the LHS is an associable add or
978 // subtract or if this is only used by one.
979 Value *V0 = Sub->getOperand(0);
980 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
981 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
982 return true;
983 Value *V1 = Sub->getOperand(1);
984 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
985 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
986 return true;
987 Value *VB = Sub->user_back();
988 if (Sub->hasOneUse() &&
989 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
990 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
991 return true;
992
993 return false;
994}
995
996/// If we have (X-Y), and if either X is an add, or if this is only used by an
997/// add, transform this into (X+(0-Y)) to promote better reassociation.
1000 // Convert a subtract into an add and a neg instruction. This allows sub
1001 // instructions to be commuted with other add instructions.
1002 //
1003 // Calculate the negative value of Operand 1 of the sub instruction,
1004 // and set it as the RHS of the add instruction we just made.
1005 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1006 BinaryOperator *New =
1007 CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
1008 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1009 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1010 New->takeName(Sub);
1011
1012 // Everyone now refers to the add instruction.
1013 Sub->replaceAllUsesWith(New);
1014 New->setDebugLoc(Sub->getDebugLoc());
1015
1016 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1017 return New;
1018}
1019
1020/// If this is a shift of a reassociable multiply or is used by one, change
1021/// this into a multiply by a constant to assist with further reassociation.
1023 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1024 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1025 MulCst = ConstantFoldBinaryInstruction(Instruction::Shl, MulCst, SA);
1026 assert(MulCst && "Constant folding of immediate constants failed");
1027
1028 BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
1029 "", Shl->getIterator());
1030 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1031 Mul->takeName(Shl);
1032
1033 // Everyone now refers to the mul instruction.
1034 Shl->replaceAllUsesWith(Mul);
1035 Mul->setDebugLoc(Shl->getDebugLoc());
1036
1037 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1038 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1039 // handling. It can be preserved as long as we're not left shifting by
1040 // bitwidth - 1.
1041 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1042 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1043 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1044 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1045 Mul->setHasNoSignedWrap(true);
1046 Mul->setHasNoUnsignedWrap(NUW);
1047 return Mul;
1048}
1049
1050/// Scan backwards and forwards among values with the same rank as element i
1051/// to see if X exists. If X does not exist, return i. This is useful when
1052/// scanning for 'x' when we see '-x' because they both get the same rank.
1054 unsigned i, Value *X) {
1055 unsigned XRank = Ops[i].Rank;
1056 unsigned e = Ops.size();
1057 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1058 if (Ops[j].Op == X)
1059 return j;
1060 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1061 if (Instruction *I2 = dyn_cast<Instruction>(X))
1062 if (I1->isIdenticalTo(I2))
1063 return j;
1064 }
1065 // Scan backwards.
1066 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1067 if (Ops[j].Op == X)
1068 return j;
1069 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1070 if (Instruction *I2 = dyn_cast<Instruction>(X))
1071 if (I1->isIdenticalTo(I2))
1072 return j;
1073 }
1074 return i;
1075}
1076
1077/// Emit a tree of add instructions, summing Ops together
1078/// and returning the result. Insert the tree before I.
1081 if (Ops.size() == 1) return Ops.back();
1082
1083 Value *V1 = Ops.pop_back_val();
1084 Value *V2 = EmitAddTreeOfValues(It, Ops);
1085 return CreateAdd(V2, V1, "reass.add", It, &*It);
1086}
1087
1088/// If V is an expression tree that is a multiplication sequence,
1089/// and if this sequence contains a multiply by Factor,
1090/// remove Factor from the tree and return the new tree.
1091Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1092 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1093 if (!BO)
1094 return nullptr;
1095
1098 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags);
1100 Factors.reserve(Tree.size());
1101 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1102 RepeatedValue E = Tree[i];
1103 Factors.append(E.second, ValueEntry(getRank(E.first), E.first));
1104 }
1105
1106 bool FoundFactor = false;
1107 bool NeedsNegate = false;
1108 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1109 if (Factors[i].Op == Factor) {
1110 FoundFactor = true;
1111 Factors.erase(Factors.begin()+i);
1112 break;
1113 }
1114
1115 // If this is a negative version of this factor, remove it.
1116 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1117 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1118 if (FC1->getValue() == -FC2->getValue()) {
1119 FoundFactor = NeedsNegate = true;
1120 Factors.erase(Factors.begin()+i);
1121 break;
1122 }
1123 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1124 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1125 const APFloat &F1 = FC1->getValueAPF();
1126 APFloat F2(FC2->getValueAPF());
1127 F2.changeSign();
1128 if (F1 == F2) {
1129 FoundFactor = NeedsNegate = true;
1130 Factors.erase(Factors.begin() + i);
1131 break;
1132 }
1133 }
1134 }
1135 }
1136
1137 if (!FoundFactor) {
1138 // Make sure to restore the operands to the expression tree.
1139 RewriteExprTree(BO, Factors, Flags);
1140 return nullptr;
1141 }
1142
1143 BasicBlock::iterator InsertPt = ++BO->getIterator();
1144
1145 // If this was just a single multiply, remove the multiply and return the only
1146 // remaining operand.
1147 if (Factors.size() == 1) {
1148 RedoInsts.insert(BO);
1149 V = Factors[0].Op;
1150 } else {
1151 RewriteExprTree(BO, Factors, Flags);
1152 V = BO;
1153 }
1154
1155 if (NeedsNegate)
1156 V = CreateNeg(V, "neg", InsertPt, BO);
1157
1158 return V;
1159}
1160
1161/// If V is a single-use multiply, recursively add its operands as factors,
1162/// otherwise add V to the list of factors.
1163///
1164/// Ops is the top-level list of add operands we're trying to factor.
1166 SmallVectorImpl<Value*> &Factors) {
1167 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1168 if (!BO) {
1169 Factors.push_back(V);
1170 return;
1171 }
1172
1173 // Otherwise, add the LHS and RHS to the list of factors.
1176}
1177
1178/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1179/// This optimizes based on identities. If it can be reduced to a single Value,
1180/// it is returned, otherwise the Ops list is mutated as necessary.
1181static Value *OptimizeAndOrXor(unsigned Opcode,
1183 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1184 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1185 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1186 // First, check for X and ~X in the operand list.
1187 assert(i < Ops.size());
1188 Value *X;
1189 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1190 unsigned FoundX = FindInOperandList(Ops, i, X);
1191 if (FoundX != i) {
1192 if (Opcode == Instruction::And) // ...&X&~X = 0
1193 return Constant::getNullValue(X->getType());
1194
1195 if (Opcode == Instruction::Or) // ...|X|~X = -1
1196 return Constant::getAllOnesValue(X->getType());
1197 }
1198 }
1199
1200 // Next, check for duplicate pairs of values, which we assume are next to
1201 // each other, due to our sorting criteria.
1202 assert(i < Ops.size());
1203 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1204 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1205 // Drop duplicate values for And and Or.
1206 Ops.erase(Ops.begin()+i);
1207 --i; --e;
1208 ++NumAnnihil;
1209 continue;
1210 }
1211
1212 // Drop pairs of values for Xor.
1213 assert(Opcode == Instruction::Xor);
1214 if (e == 2)
1215 return Constant::getNullValue(Ops[0].Op->getType());
1216
1217 // Y ^ X^X -> Y
1218 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1219 i -= 1; e -= 2;
1220 ++NumAnnihil;
1221 }
1222 }
1223 return nullptr;
1224}
1225
1226/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1227/// instruction with the given two operands, and return the resulting
1228/// instruction. There are two special cases: 1) if the constant operand is 0,
1229/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1230/// be returned.
1232 const APInt &ConstOpnd) {
1233 if (ConstOpnd.isZero())
1234 return nullptr;
1235
1236 if (ConstOpnd.isAllOnes())
1237 return Opnd;
1238
1239 Instruction *I = BinaryOperator::CreateAnd(
1240 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1241 InsertBefore);
1242 I->setDebugLoc(InsertBefore->getDebugLoc());
1243 return I;
1244}
1245
1246// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1247// into "R ^ C", where C would be 0, and R is a symbolic value.
1248//
1249// If it was successful, true is returned, and the "R" and "C" is returned
1250// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1251// and both "Res" and "ConstOpnd" remain unchanged.
1252bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1253 APInt &ConstOpnd, Value *&Res) {
1254 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1255 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1256 // = (x & ~c1) ^ (c1 ^ c2)
1257 // It is useful only when c1 == c2.
1258 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1259 return false;
1260
1261 if (!Opnd1->getValue()->hasOneUse())
1262 return false;
1263
1264 const APInt &C1 = Opnd1->getConstPart();
1265 if (C1 != ConstOpnd)
1266 return false;
1267
1268 Value *X = Opnd1->getSymbolicPart();
1269 Res = createAndInstr(It, X, ~C1);
1270 // ConstOpnd was C2, now C1 ^ C2.
1271 ConstOpnd ^= C1;
1272
1273 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1274 RedoInsts.insert(T);
1275 return true;
1276}
1277
1278// Helper function of OptimizeXor(). It tries to simplify
1279// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1280// symbolic value.
1281//
1282// If it was successful, true is returned, and the "R" and "C" is returned
1283// via "Res" and "ConstOpnd", respectively (If the entire expression is
1284// evaluated to a constant, the Res is set to NULL); otherwise, false is
1285// returned, and both "Res" and "ConstOpnd" remain unchanged.
1286bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1287 XorOpnd *Opnd2, APInt &ConstOpnd,
1288 Value *&Res) {
1289 Value *X = Opnd1->getSymbolicPart();
1290 if (X != Opnd2->getSymbolicPart())
1291 return false;
1292
1293 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1294 int DeadInstNum = 1;
1295 if (Opnd1->getValue()->hasOneUse())
1296 DeadInstNum++;
1297 if (Opnd2->getValue()->hasOneUse())
1298 DeadInstNum++;
1299
1300 // Xor-Rule 2:
1301 // (x | c1) ^ (x & c2)
1302 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1303 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1304 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1305 //
1306 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1307 if (Opnd2->isOrExpr())
1308 std::swap(Opnd1, Opnd2);
1309
1310 const APInt &C1 = Opnd1->getConstPart();
1311 const APInt &C2 = Opnd2->getConstPart();
1312 APInt C3((~C1) ^ C2);
1313
1314 // Do not increase code size!
1315 if (!C3.isZero() && !C3.isAllOnes()) {
1316 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1317 if (NewInstNum > DeadInstNum)
1318 return false;
1319 }
1320
1321 Res = createAndInstr(It, X, C3);
1322 ConstOpnd ^= C1;
1323 } else if (Opnd1->isOrExpr()) {
1324 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1325 //
1326 const APInt &C1 = Opnd1->getConstPart();
1327 const APInt &C2 = Opnd2->getConstPart();
1328 APInt C3 = C1 ^ C2;
1329
1330 // Do not increase code size
1331 if (!C3.isZero() && !C3.isAllOnes()) {
1332 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1333 if (NewInstNum > DeadInstNum)
1334 return false;
1335 }
1336
1337 Res = createAndInstr(It, X, C3);
1338 ConstOpnd ^= C3;
1339 } else {
1340 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1341 //
1342 const APInt &C1 = Opnd1->getConstPart();
1343 const APInt &C2 = Opnd2->getConstPart();
1344 APInt C3 = C1 ^ C2;
1345 Res = createAndInstr(It, X, C3);
1346 }
1347
1348 // Put the original operands in the Redo list; hope they will be deleted
1349 // as dead code.
1350 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1351 RedoInsts.insert(T);
1352 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1353 RedoInsts.insert(T);
1354
1355 return true;
1356}
1357
1358/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1359/// to a single Value, it is returned, otherwise the Ops list is mutated as
1360/// necessary.
1361Value *ReassociatePass::OptimizeXor(Instruction *I,
1363 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1364 return V;
1365
1366 if (Ops.size() == 1)
1367 return nullptr;
1368
1370 SmallVector<XorOpnd*, 8> OpndPtrs;
1371 Type *Ty = Ops[0].Op->getType();
1372 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1373
1374 // Step 1: Convert ValueEntry to XorOpnd
1375 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1376 Value *V = Ops[i].Op;
1377 const APInt *C;
1378 // TODO: Support non-splat vectors.
1379 if (match(V, m_APInt(C))) {
1380 ConstOpnd ^= *C;
1381 } else {
1382 XorOpnd O(V);
1383 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1384 Opnds.push_back(O);
1385 }
1386 }
1387
1388 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1389 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1390 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1391 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1392 // when new elements are added to the vector.
1393 for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
1394 OpndPtrs.push_back(&Opnds[i]);
1395
1396 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1397 // the same symbolic value cluster together. For instance, the input operand
1398 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1399 // ("x | 123", "x & 789", "y & 456").
1400 //
1401 // The purpose is twofold:
1402 // 1) Cluster together the operands sharing the same symbolic-value.
1403 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1404 // could potentially shorten crital path, and expose more loop-invariants.
1405 // Note that values' rank are basically defined in RPO order (FIXME).
1406 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1407 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1408 // "z" in the order of X-Y-Z is better than any other orders.
1409 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1410 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1411 });
1412
1413 // Step 3: Combine adjacent operands
1414 XorOpnd *PrevOpnd = nullptr;
1415 bool Changed = false;
1416 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1417 XorOpnd *CurrOpnd = OpndPtrs[i];
1418 // The combined value
1419 Value *CV;
1420
1421 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1422 if (!ConstOpnd.isZero() &&
1423 CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
1424 Changed = true;
1425 if (CV)
1426 *CurrOpnd = XorOpnd(CV);
1427 else {
1428 CurrOpnd->Invalidate();
1429 continue;
1430 }
1431 }
1432
1433 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1434 PrevOpnd = CurrOpnd;
1435 continue;
1436 }
1437
1438 // step 3.2: When previous and current operands share the same symbolic
1439 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1440 if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1441 // Remove previous operand
1442 PrevOpnd->Invalidate();
1443 if (CV) {
1444 *CurrOpnd = XorOpnd(CV);
1445 PrevOpnd = CurrOpnd;
1446 } else {
1447 CurrOpnd->Invalidate();
1448 PrevOpnd = nullptr;
1449 }
1450 Changed = true;
1451 }
1452 }
1453
1454 // Step 4: Reassemble the Ops
1455 if (Changed) {
1456 Ops.clear();
1457 for (const XorOpnd &O : Opnds) {
1458 if (O.isInvalid())
1459 continue;
1460 ValueEntry VE(getRank(O.getValue()), O.getValue());
1461 Ops.push_back(VE);
1462 }
1463 if (!ConstOpnd.isZero()) {
1464 Value *C = ConstantInt::get(Ty, ConstOpnd);
1465 ValueEntry VE(getRank(C), C);
1466 Ops.push_back(VE);
1467 }
1468 unsigned Sz = Ops.size();
1469 if (Sz == 1)
1470 return Ops.back().Op;
1471 if (Sz == 0) {
1472 assert(ConstOpnd.isZero());
1473 return ConstantInt::get(Ty, ConstOpnd);
1474 }
1475 }
1476
1477 return nullptr;
1478}
1479
1480/// Optimize a series of operands to an 'add' instruction. This
1481/// optimizes based on identities. If it can be reduced to a single Value, it
1482/// is returned, otherwise the Ops list is mutated as necessary.
1483Value *ReassociatePass::OptimizeAdd(Instruction *I,
1485 // Scan the operand lists looking for X and -X pairs. If we find any, we
1486 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1487 // scan for any
1488 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1489
1490 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1491 Value *TheOp = Ops[i].Op;
1492 // Check to see if we've seen this operand before. If so, we factor all
1493 // instances of the operand together. Due to our sorting criteria, we know
1494 // that these need to be next to each other in the vector.
1495 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1496 // Rescan the list, remove all instances of this operand from the expr.
1497 unsigned NumFound = 0;
1498 do {
1499 Ops.erase(Ops.begin()+i);
1500 ++NumFound;
1501 } while (i != Ops.size() && Ops[i].Op == TheOp);
1502
1503 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1504 << '\n');
1505 ++NumFactor;
1506
1507 // Insert a new multiply.
1508 Type *Ty = TheOp->getType();
1509 Constant *C = Ty->isIntOrIntVectorTy() ?
1510 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1511 Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
1512
1513 // Now that we have inserted a multiply, optimize it. This allows us to
1514 // handle cases that require multiple factoring steps, such as this:
1515 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1516 RedoInsts.insert(Mul);
1517
1518 // If every add operand was a duplicate, return the multiply.
1519 if (Ops.empty())
1520 return Mul;
1521
1522 // Otherwise, we had some input that didn't have the dupe, such as
1523 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1524 // things being added by this operation.
1525 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1526
1527 --i;
1528 e = Ops.size();
1529 continue;
1530 }
1531
1532 // Check for X and -X or X and ~X in the operand list.
1533 Value *X;
1534 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1535 !match(TheOp, m_FNeg(m_Value(X))))
1536 continue;
1537
1538 unsigned FoundX = FindInOperandList(Ops, i, X);
1539 if (FoundX == i)
1540 continue;
1541
1542 // Remove X and -X from the operand list.
1543 if (Ops.size() == 2 &&
1544 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1545 return Constant::getNullValue(X->getType());
1546
1547 // Remove X and ~X from the operand list.
1548 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1549 return Constant::getAllOnesValue(X->getType());
1550
1551 Ops.erase(Ops.begin()+i);
1552 if (i < FoundX)
1553 --FoundX;
1554 else
1555 --i; // Need to back up an extra one.
1556 Ops.erase(Ops.begin()+FoundX);
1557 ++NumAnnihil;
1558 --i; // Revisit element.
1559 e -= 2; // Removed two elements.
1560
1561 // if X and ~X we append -1 to the operand list.
1562 if (match(TheOp, m_Not(m_Value()))) {
1563 Value *V = Constant::getAllOnesValue(X->getType());
1564 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1565 e += 1;
1566 }
1567 }
1568
1569 // Scan the operand list, checking to see if there are any common factors
1570 // between operands. Consider something like A*A+A*B*C+D. We would like to
1571 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1572 // To efficiently find this, we count the number of times a factor occurs
1573 // for any ADD operands that are MULs.
1574 DenseMap<Value*, unsigned> FactorOccurrences;
1575
1576 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1577 // where they are actually the same multiply.
1578 unsigned MaxOcc = 0;
1579 Value *MaxOccVal = nullptr;
1580 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1581 BinaryOperator *BOp =
1582 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1583 if (!BOp)
1584 continue;
1585
1586 // Compute all of the factors of this added value.
1587 SmallVector<Value*, 8> Factors;
1588 FindSingleUseMultiplyFactors(BOp, Factors);
1589 assert(Factors.size() > 1 && "Bad linearize!");
1590
1591 // Add one to FactorOccurrences for each unique factor in this op.
1592 SmallPtrSet<Value*, 8> Duplicates;
1593 for (Value *Factor : Factors) {
1594 if (!Duplicates.insert(Factor).second)
1595 continue;
1596
1597 unsigned Occ = ++FactorOccurrences[Factor];
1598 if (Occ > MaxOcc) {
1599 MaxOcc = Occ;
1600 MaxOccVal = Factor;
1601 }
1602
1603 // If Factor is a negative constant, add the negated value as a factor
1604 // because we can percolate the negate out. Watch for minint, which
1605 // cannot be positivified.
1606 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1607 if (CI->isNegative() && !CI->isMinValue(true)) {
1608 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1609 if (!Duplicates.insert(Factor).second)
1610 continue;
1611 unsigned Occ = ++FactorOccurrences[Factor];
1612 if (Occ > MaxOcc) {
1613 MaxOcc = Occ;
1614 MaxOccVal = Factor;
1615 }
1616 }
1617 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1618 if (CF->isNegative()) {
1619 APFloat F(CF->getValueAPF());
1620 F.changeSign();
1621 Factor = ConstantFP::get(CF->getContext(), F);
1622 if (!Duplicates.insert(Factor).second)
1623 continue;
1624 unsigned Occ = ++FactorOccurrences[Factor];
1625 if (Occ > MaxOcc) {
1626 MaxOcc = Occ;
1627 MaxOccVal = Factor;
1628 }
1629 }
1630 }
1631 }
1632 }
1633
1634 // If any factor occurred more than one time, we can pull it out.
1635 if (MaxOcc > 1) {
1636 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1637 << '\n');
1638 ++NumFactor;
1639
1640 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1641 // this, we could otherwise run into situations where removing a factor
1642 // from an expression will drop a use of maxocc, and this can cause
1643 // RemoveFactorFromExpression on successive values to behave differently.
1644 Instruction *DummyInst =
1645 I->getType()->isIntOrIntVectorTy()
1646 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1647 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1648
1650 for (unsigned i = 0; i != Ops.size(); ++i) {
1651 // Only try to remove factors from expressions we're allowed to.
1652 BinaryOperator *BOp =
1653 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1654 if (!BOp)
1655 continue;
1656
1657 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1658 // The factorized operand may occur several times. Convert them all in
1659 // one fell swoop.
1660 for (unsigned j = Ops.size(); j != i;) {
1661 --j;
1662 if (Ops[j].Op == Ops[i].Op) {
1663 NewMulOps.push_back(V);
1664 Ops.erase(Ops.begin()+j);
1665 }
1666 }
1667 --i;
1668 }
1669 }
1670
1671 // No need for extra uses anymore.
1672 DummyInst->deleteValue();
1673
1674 unsigned NumAddedValues = NewMulOps.size();
1675 Value *V = EmitAddTreeOfValues(I->getIterator(), NewMulOps);
1676
1677 // Now that we have inserted the add tree, optimize it. This allows us to
1678 // handle cases that require multiple factoring steps, such as this:
1679 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1680 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1681 (void)NumAddedValues;
1682 if (Instruction *VI = dyn_cast<Instruction>(V))
1683 RedoInsts.insert(VI);
1684
1685 // Create the multiply.
1686 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
1687
1688 // Rerun associate on the multiply in case the inner expression turned into
1689 // a multiply. We want to make sure that we keep things in canonical form.
1690 RedoInsts.insert(V2);
1691
1692 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1693 // entire result expression is just the multiply "A*(B+C)".
1694 if (Ops.empty())
1695 return V2;
1696
1697 // Otherwise, we had some input that didn't have the factor, such as
1698 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1699 // things being added by this operation.
1700 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1701 }
1702
1703 return nullptr;
1704}
1705
1706/// Build up a vector of value/power pairs factoring a product.
1707///
1708/// Given a series of multiplication operands, build a vector of factors and
1709/// the powers each is raised to when forming the final product. Sort them in
1710/// the order of descending power.
1711///
1712/// (x*x) -> [(x, 2)]
1713/// ((x*x)*x) -> [(x, 3)]
1714/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1715///
1716/// \returns Whether any factors have a power greater than one.
1718 SmallVectorImpl<Factor> &Factors) {
1719 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1720 // Compute the sum of powers of simplifiable factors.
1721 unsigned FactorPowerSum = 0;
1722 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1723 Value *Op = Ops[Idx-1].Op;
1724
1725 // Count the number of occurrences of this value.
1726 unsigned Count = 1;
1727 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1728 ++Count;
1729 // Track for simplification all factors which occur 2 or more times.
1730 if (Count > 1)
1731 FactorPowerSum += Count;
1732 }
1733
1734 // We can only simplify factors if the sum of the powers of our simplifiable
1735 // factors is 4 or higher. When that is the case, we will *always* have
1736 // a simplification. This is an important invariant to prevent cyclicly
1737 // trying to simplify already minimal formations.
1738 if (FactorPowerSum < 4)
1739 return false;
1740
1741 // Now gather the simplifiable factors, removing them from Ops.
1742 FactorPowerSum = 0;
1743 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1744 Value *Op = Ops[Idx-1].Op;
1745
1746 // Count the number of occurrences of this value.
1747 unsigned Count = 1;
1748 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1749 ++Count;
1750 if (Count == 1)
1751 continue;
1752 // Move an even number of occurrences to Factors.
1753 Count &= ~1U;
1754 Idx -= Count;
1755 FactorPowerSum += Count;
1756 Factors.push_back(Factor(Op, Count));
1757 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1758 }
1759
1760 // None of the adjustments above should have reduced the sum of factor powers
1761 // below our mininum of '4'.
1762 assert(FactorPowerSum >= 4);
1763
1764 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1765 return LHS.Power > RHS.Power;
1766 });
1767 return true;
1768}
1769
1770/// Build a tree of multiplies, computing the product of Ops.
1773 if (Ops.size() == 1)
1774 return Ops.back();
1775
1776 Value *LHS = Ops.pop_back_val();
1777 do {
1778 if (LHS->getType()->isIntOrIntVectorTy())
1779 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1780 else
1781 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1782 } while (!Ops.empty());
1783
1784 return LHS;
1785}
1786
1787/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1788///
1789/// Given a vector of values raised to various powers, where no two values are
1790/// equal and the powers are sorted in decreasing order, compute the minimal
1791/// DAG of multiplies to compute the final product, and return that product
1792/// value.
1793Value *
1794ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1795 SmallVectorImpl<Factor> &Factors) {
1796 assert(Factors[0].Power);
1797 SmallVector<Value *, 4> OuterProduct;
1798 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1799 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1800 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1801 LastIdx = Idx;
1802 continue;
1803 }
1804
1805 // We want to multiply across all the factors with the same power so that
1806 // we can raise them to that power as a single entity. Build a mini tree
1807 // for that.
1808 SmallVector<Value *, 4> InnerProduct;
1809 InnerProduct.push_back(Factors[LastIdx].Base);
1810 do {
1811 InnerProduct.push_back(Factors[Idx].Base);
1812 ++Idx;
1813 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1814
1815 // Reset the base value of the first factor to the new expression tree.
1816 // We'll remove all the factors with the same power in a second pass.
1817 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1818 if (Instruction *MI = dyn_cast<Instruction>(M))
1819 RedoInsts.insert(MI);
1820
1821 LastIdx = Idx;
1822 }
1823 // Unique factors with equal powers -- we've folded them into the first one's
1824 // base.
1825 Factors.erase(llvm::unique(Factors,
1826 [](const Factor &LHS, const Factor &RHS) {
1827 return LHS.Power == RHS.Power;
1828 }),
1829 Factors.end());
1830
1831 // Iteratively collect the base of each factor with an add power into the
1832 // outer product, and halve each power in preparation for squaring the
1833 // expression.
1834 for (Factor &F : Factors) {
1835 if (F.Power & 1)
1836 OuterProduct.push_back(F.Base);
1837 F.Power >>= 1;
1838 }
1839 if (Factors[0].Power) {
1840 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1841 OuterProduct.push_back(SquareRoot);
1842 OuterProduct.push_back(SquareRoot);
1843 }
1844 if (OuterProduct.size() == 1)
1845 return OuterProduct.front();
1846
1847 Value *V = buildMultiplyTree(Builder, OuterProduct);
1848 return V;
1849}
1850
1851Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1853 // We can only optimize the multiplies when there is a chain of more than
1854 // three, such that a balanced tree might require fewer total multiplies.
1855 if (Ops.size() < 4)
1856 return nullptr;
1857
1858 // Try to turn linear trees of multiplies without other uses of the
1859 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1860 // re-use.
1861 SmallVector<Factor, 4> Factors;
1862 if (!collectMultiplyFactors(Ops, Factors))
1863 return nullptr; // All distinct factors, so nothing left for us to do.
1864
1865 IRBuilder<> Builder(I);
1866 // The reassociate transformation for FP operations is performed only
1867 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1868 // to the newly generated operations.
1869 if (auto FPI = dyn_cast<FPMathOperator>(I))
1870 Builder.setFastMathFlags(FPI->getFastMathFlags());
1871
1872 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1873 if (Ops.empty())
1874 return V;
1875
1876 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1877 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1878 return nullptr;
1879}
1880
1881Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1883 // Now that we have the linearized expression tree, try to optimize it.
1884 // Start by folding any constants that we found.
1885 const DataLayout &DL = I->getDataLayout();
1886 Constant *Cst = nullptr;
1887 unsigned Opcode = I->getOpcode();
1888 while (!Ops.empty()) {
1889 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1890 if (!Cst) {
1891 Ops.pop_back();
1892 Cst = C;
1893 continue;
1894 }
1895 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1896 Ops.pop_back();
1897 Cst = Res;
1898 continue;
1899 }
1900 }
1901 break;
1902 }
1903 // If there was nothing but constants then we are done.
1904 if (Ops.empty())
1905 return Cst;
1906
1907 // Put the combined constant back at the end of the operand list, except if
1908 // there is no point. For example, an add of 0 gets dropped here, while a
1909 // multiplication by zero turns the whole expression into zero.
1910 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1911 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1912 return Cst;
1913 Ops.push_back(ValueEntry(0, Cst));
1914 }
1915
1916 if (Ops.size() == 1) return Ops[0].Op;
1917
1918 // Handle destructive annihilation due to identities between elements in the
1919 // argument list here.
1920 unsigned NumOps = Ops.size();
1921 switch (Opcode) {
1922 default: break;
1923 case Instruction::And:
1924 case Instruction::Or:
1925 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1926 return Result;
1927 break;
1928
1929 case Instruction::Xor:
1930 if (Value *Result = OptimizeXor(I, Ops))
1931 return Result;
1932 break;
1933
1934 case Instruction::Add:
1935 case Instruction::FAdd:
1936 if (Value *Result = OptimizeAdd(I, Ops))
1937 return Result;
1938 break;
1939
1940 case Instruction::Mul:
1941 case Instruction::FMul:
1942 if (Value *Result = OptimizeMul(I, Ops))
1943 return Result;
1944 break;
1945 }
1946
1947 if (Ops.size() != NumOps)
1948 return OptimizeExpression(I, Ops);
1949 return nullptr;
1950}
1951
1952// Remove dead instructions and if any operands are trivially dead add them to
1953// Insts so they will be removed as well.
1954void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
1955 OrderedSet &Insts) {
1956 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1957 SmallVector<Value *, 4> Ops(I->operands());
1958 ValueRankMap.erase(I);
1959 Insts.remove(I);
1960 RedoInsts.remove(I);
1962 I->eraseFromParent();
1963 for (auto *Op : Ops)
1964 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1965 if (OpInst->use_empty())
1966 Insts.insert(OpInst);
1967}
1968
1969/// Zap the given instruction, adding interesting operands to the work list.
1970void ReassociatePass::EraseInst(Instruction *I) {
1971 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1972 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
1973
1974 SmallVector<Value *, 8> Ops(I->operands());
1975 // Erase the dead instruction.
1976 ValueRankMap.erase(I);
1977 RedoInsts.remove(I);
1979 I->eraseFromParent();
1980 // Optimize its operands.
1981 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
1982 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
1983 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
1984 // If this is a node in an expression tree, climb to the expression root
1985 // and add that since that's where optimization actually happens.
1986 unsigned Opcode = Op->getOpcode();
1987 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
1988 Visited.insert(Op).second)
1989 Op = Op->user_back();
1990
1991 // The instruction we're going to push may be coming from a
1992 // dead block, and Reassociate skips the processing of unreachable
1993 // blocks because it's a waste of time and also because it can
1994 // lead to infinite loop due to LLVM's non-standard definition
1995 // of dominance.
1996 if (ValueRankMap.contains(Op))
1997 RedoInsts.insert(Op);
1998 }
1999
2000 MadeChange = true;
2001}
2002
2003/// Recursively analyze an expression to build a list of instructions that have
2004/// negative floating-point constant operands. The caller can then transform
2005/// the list to create positive constants for better reassociation and CSE.
2007 SmallVectorImpl<Instruction *> &Candidates) {
2008 // Handle only one-use instructions. Combining negations does not justify
2009 // replicating instructions.
2010 Instruction *I;
2011 if (!match(V, m_OneUse(m_Instruction(I))))
2012 return;
2013
2014 // Handle expressions of multiplications and divisions.
2015 // TODO: This could look through floating-point casts.
2016 const APFloat *C;
2017 switch (I->getOpcode()) {
2018 case Instruction::FMul:
2019 // Not expecting non-canonical code here. Bail out and wait.
2020 if (match(I->getOperand(0), m_Constant()))
2021 break;
2022
2023 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2024 Candidates.push_back(I);
2025 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2026 }
2027 getNegatibleInsts(I->getOperand(0), Candidates);
2028 getNegatibleInsts(I->getOperand(1), Candidates);
2029 break;
2030 case Instruction::FDiv:
2031 // Not expecting non-canonical code here. Bail out and wait.
2032 if (match(I->getOperand(0), m_Constant()) &&
2033 match(I->getOperand(1), m_Constant()))
2034 break;
2035
2036 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2037 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2038 Candidates.push_back(I);
2039 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2040 }
2041 getNegatibleInsts(I->getOperand(0), Candidates);
2042 getNegatibleInsts(I->getOperand(1), Candidates);
2043 break;
2044 default:
2045 break;
2046 }
2047}
2048
2049/// Given an fadd/fsub with an operand that is a one-use instruction
2050/// (the fadd/fsub), try to change negative floating-point constants into
2051/// positive constants to increase potential for reassociation and CSE.
2052Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2053 Instruction *Op,
2054 Value *OtherOp) {
2055 assert((I->getOpcode() == Instruction::FAdd ||
2056 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2057
2058 // Collect instructions with negative FP constants from the subtree that ends
2059 // in Op.
2061 getNegatibleInsts(Op, Candidates);
2062 if (Candidates.empty())
2063 return nullptr;
2064
2065 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2066 // resulting subtract will be broken up later. This can get us into an
2067 // infinite loop during reassociation.
2068 bool IsFSub = I->getOpcode() == Instruction::FSub;
2069 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2070 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2071 return nullptr;
2072
2073 for (Instruction *Negatible : Candidates) {
2074 const APFloat *C;
2075 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2076 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2077 "Expecting only 1 constant operand");
2078 assert(C->isNegative() && "Expected negative FP constant");
2079 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2080 MadeChange = true;
2081 }
2082 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2083 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2084 "Expecting only 1 constant operand");
2085 assert(C->isNegative() && "Expected negative FP constant");
2086 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2087 MadeChange = true;
2088 }
2089 }
2090 assert(MadeChange == true && "Negative constant candidate was not changed");
2091
2092 // Negations cancelled out.
2093 if (Candidates.size() % 2 == 0)
2094 return I;
2095
2096 // Negate the final operand in the expression by flipping the opcode of this
2097 // fadd/fsub.
2098 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2099 IRBuilder<> Builder(I);
2100 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2101 : Builder.CreateFSubFMF(OtherOp, Op, I);
2102 I->replaceAllUsesWith(NewInst);
2103 RedoInsts.insert(I);
2104 return dyn_cast<Instruction>(NewInst);
2105}
2106
2107/// Canonicalize expressions that contain a negative floating-point constant
2108/// of the following form:
2109/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2110/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2111/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2112///
2113/// The fadd/fsub opcode may be switched to allow folding a negation into the
2114/// input instruction.
2115Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2116 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2117 Value *X;
2118 Instruction *Op;
2120 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2121 I = R;
2123 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2124 I = R;
2126 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2127 I = R;
2128 return I;
2129}
2130
2131/// Inspect and optimize the given instruction. Note that erasing
2132/// instructions is not allowed.
2133void ReassociatePass::OptimizeInst(Instruction *I) {
2134 // Only consider operations that we understand.
2135 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2136 return;
2137
2138 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2139 // If an operand of this shift is a reassociable multiply, or if the shift
2140 // is used by a reassociable multiply or add, turn into a multiply.
2141 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2142 (I->hasOneUse() &&
2143 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2144 isReassociableOp(I->user_back(), Instruction::Add)))) {
2146 RedoInsts.insert(I);
2147 MadeChange = true;
2148 I = NI;
2149 }
2150
2151 // Commute binary operators, to canonicalize the order of their operands.
2152 // This can potentially expose more CSE opportunities, and makes writing other
2153 // transformations simpler.
2154 if (I->isCommutative())
2155 canonicalizeOperands(I);
2156
2157 // Canonicalize negative constants out of expressions.
2158 if (Instruction *Res = canonicalizeNegFPConstants(I))
2159 I = Res;
2160
2161 // Don't optimize floating-point instructions unless they have the
2162 // appropriate FastMathFlags for reassociation enabled.
2163 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2164 return;
2165
2166 // Do not reassociate boolean (i1) expressions. We want to preserve the
2167 // original order of evaluation for short-circuited comparisons that
2168 // SimplifyCFG has folded to AND/OR expressions. If the expression
2169 // is not further optimized, it is likely to be transformed back to a
2170 // short-circuited form for code gen, and the source order may have been
2171 // optimized for the most likely conditions.
2172 if (I->getType()->isIntegerTy(1))
2173 return;
2174
2175 // If this is a bitwise or instruction of operands
2176 // with no common bits set, convert it to X+Y.
2177 if (I->getOpcode() == Instruction::Or &&
2179 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2180 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2181 SimplifyQuery(I->getDataLayout(),
2182 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2184 RedoInsts.insert(I);
2185 MadeChange = true;
2186 I = NI;
2187 }
2188
2189 // If this is a subtract instruction which is not already in negate form,
2190 // see if we can convert it to X+-Y.
2191 if (I->getOpcode() == Instruction::Sub) {
2192 if (ShouldBreakUpSubtract(I)) {
2193 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2194 RedoInsts.insert(I);
2195 MadeChange = true;
2196 I = NI;
2197 } else if (match(I, m_Neg(m_Value()))) {
2198 // Otherwise, this is a negation. See if the operand is a multiply tree
2199 // and if this is not an inner node of a multiply tree.
2200 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2201 (!I->hasOneUse() ||
2202 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2204 // If the negate was simplified, revisit the users to see if we can
2205 // reassociate further.
2206 for (User *U : NI->users()) {
2207 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2208 RedoInsts.insert(Tmp);
2209 }
2210 RedoInsts.insert(I);
2211 MadeChange = true;
2212 I = NI;
2213 }
2214 }
2215 } else if (I->getOpcode() == Instruction::FNeg ||
2216 I->getOpcode() == Instruction::FSub) {
2217 if (ShouldBreakUpSubtract(I)) {
2218 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2219 RedoInsts.insert(I);
2220 MadeChange = true;
2221 I = NI;
2222 } else if (match(I, m_FNeg(m_Value()))) {
2223 // Otherwise, this is a negation. See if the operand is a multiply tree
2224 // and if this is not an inner node of a multiply tree.
2225 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2226 I->getOperand(0);
2227 if (isReassociableOp(Op, Instruction::FMul) &&
2228 (!I->hasOneUse() ||
2229 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2230 // If the negate was simplified, revisit the users to see if we can
2231 // reassociate further.
2233 for (User *U : NI->users()) {
2234 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2235 RedoInsts.insert(Tmp);
2236 }
2237 RedoInsts.insert(I);
2238 MadeChange = true;
2239 I = NI;
2240 }
2241 }
2242 }
2243
2244 // If this instruction is an associative binary operator, process it.
2245 if (!I->isAssociative()) return;
2246 BinaryOperator *BO = cast<BinaryOperator>(I);
2247
2248 // If this is an interior node of a reassociable tree, ignore it until we
2249 // get to the root of the tree, to avoid N^2 analysis.
2250 unsigned Opcode = BO->getOpcode();
2251 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2252 // During the initial run we will get to the root of the tree.
2253 // But if we get here while we are redoing instructions, there is no
2254 // guarantee that the root will be visited. So Redo later
2255 if (BO->user_back() != BO &&
2256 BO->getParent() == BO->user_back()->getParent())
2257 RedoInsts.insert(BO->user_back());
2258 return;
2259 }
2260
2261 // If this is an add tree that is used by a sub instruction, ignore it
2262 // until we process the subtract.
2263 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2264 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2265 return;
2266 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2267 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2268 return;
2269
2270 ReassociateExpression(BO);
2271}
2272
2273void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2274 // First, walk the expression tree, linearizing the tree, collecting the
2275 // operand information.
2278 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags);
2280 Ops.reserve(Tree.size());
2281 for (const RepeatedValue &E : Tree)
2282 Ops.append(E.second, ValueEntry(getRank(E.first), E.first));
2283
2284 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2285
2286 // Now that we have linearized the tree to a list and have gathered all of
2287 // the operands and their ranks, sort the operands by their rank. Use a
2288 // stable_sort so that values with equal ranks will have their relative
2289 // positions maintained (and so the compiler is deterministic). Note that
2290 // this sorts so that the highest ranking values end up at the beginning of
2291 // the vector.
2292 llvm::stable_sort(Ops);
2293
2294 // Now that we have the expression tree in a convenient
2295 // sorted form, optimize it globally if possible.
2296 if (Value *V = OptimizeExpression(I, Ops)) {
2297 if (V == I)
2298 // Self-referential expression in unreachable code.
2299 return;
2300 // This expression tree simplified to something that isn't a tree,
2301 // eliminate it.
2302 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2303 I->replaceAllUsesWith(V);
2304 if (Instruction *VI = dyn_cast<Instruction>(V))
2305 if (I->getDebugLoc())
2306 VI->setDebugLoc(I->getDebugLoc());
2307 RedoInsts.insert(I);
2308 ++NumAnnihil;
2309 return;
2310 }
2311
2312 // We want to sink immediates as deeply as possible except in the case where
2313 // this is a multiply tree used only by an add, and the immediate is a -1.
2314 // In this case we reassociate to put the negation on the outside so that we
2315 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2316 if (I->hasOneUse()) {
2317 if (I->getOpcode() == Instruction::Mul &&
2318 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2319 isa<ConstantInt>(Ops.back().Op) &&
2320 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2321 ValueEntry Tmp = Ops.pop_back_val();
2322 Ops.insert(Ops.begin(), Tmp);
2323 } else if (I->getOpcode() == Instruction::FMul &&
2324 cast<Instruction>(I->user_back())->getOpcode() ==
2325 Instruction::FAdd &&
2326 isa<ConstantFP>(Ops.back().Op) &&
2327 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2328 ValueEntry Tmp = Ops.pop_back_val();
2329 Ops.insert(Ops.begin(), Tmp);
2330 }
2331 }
2332
2333 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2334
2335 if (Ops.size() == 1) {
2336 if (Ops[0].Op == I)
2337 // Self-referential expression in unreachable code.
2338 return;
2339
2340 // This expression tree simplified to something that isn't a tree,
2341 // eliminate it.
2342 I->replaceAllUsesWith(Ops[0].Op);
2343 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2344 OI->setDebugLoc(I->getDebugLoc());
2345 RedoInsts.insert(I);
2346 return;
2347 }
2348
2349 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2350 // Find the pair with the highest count in the pairmap and move it to the
2351 // back of the list so that it can later be CSE'd.
2352 // example:
2353 // a*b*c*d*e
2354 // if c*e is the most "popular" pair, we can express this as
2355 // (((c*e)*d)*b)*a
2356 unsigned Max = 1;
2357 unsigned BestRank = 0;
2358 std::pair<unsigned, unsigned> BestPair;
2359 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2360 unsigned LimitIdx = 0;
2361 // With the CSE-driven heuristic, we are about to slap two values at the
2362 // beginning of the expression whereas they could live very late in the CFG.
2363 // When using the CSE-local heuristic we avoid creating dependences from
2364 // completely unrelated part of the CFG by limiting the expression
2365 // reordering on the values that live in the first seen basic block.
2366 // The main idea is that we want to avoid forming expressions that would
2367 // become loop dependent.
2368 if (UseCSELocalOpt) {
2369 const BasicBlock *FirstSeenBB = nullptr;
2370 int StartIdx = Ops.size() - 1;
2371 // Skip the first value of the expression since we need at least two
2372 // values to materialize an expression. I.e., even if this value is
2373 // anchored in a different basic block, the actual first sub expression
2374 // will be anchored on the second value.
2375 for (int i = StartIdx - 1; i != -1; --i) {
2376 const Value *Val = Ops[i].Op;
2377 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2378 const BasicBlock *SeenBB = nullptr;
2379 if (!CurrLeafInstr) {
2380 // The value is free of any CFG dependencies.
2381 // Do as if it lives in the entry block.
2382 //
2383 // We do this to make sure all the values falling on this path are
2384 // seen through the same anchor point. The rationale is these values
2385 // can be combined together to from a sub expression free of any CFG
2386 // dependencies so we want them to stay together.
2387 // We could be cleverer and postpone the anchor down to the first
2388 // anchored value, but that's likely complicated to get right.
2389 // E.g., we wouldn't want to do that if that means being stuck in a
2390 // loop.
2391 //
2392 // For instance, we wouldn't want to change:
2393 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2394 // into
2395 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2396 // Because all the sub expressions with arg2..N would be stuck between
2397 // two loop dependent values.
2398 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2399 } else {
2400 SeenBB = CurrLeafInstr->getParent();
2401 }
2402
2403 if (!FirstSeenBB) {
2404 FirstSeenBB = SeenBB;
2405 continue;
2406 }
2407 if (FirstSeenBB != SeenBB) {
2408 // ith value is in a different basic block.
2409 // Rewind the index once to point to the last value on the same basic
2410 // block.
2411 LimitIdx = i + 1;
2412 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2413 << LimitIdx << ", " << StartIdx << "]\n");
2414 break;
2415 }
2416 }
2417 }
2418 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2419 // We must use int type to go below zero when LimitIdx is 0.
2420 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2421 unsigned Score = 0;
2422 Value *Op0 = Ops[i].Op;
2423 Value *Op1 = Ops[j].Op;
2424 if (std::less<Value *>()(Op1, Op0))
2425 std::swap(Op0, Op1);
2426 auto it = PairMap[Idx].find({Op0, Op1});
2427 if (it != PairMap[Idx].end()) {
2428 // Functions like BreakUpSubtract() can erase the Values we're using
2429 // as keys and create new Values after we built the PairMap. There's a
2430 // small chance that the new nodes can have the same address as
2431 // something already in the table. We shouldn't accumulate the stored
2432 // score in that case as it refers to the wrong Value.
2433 if (it->second.isValid())
2434 Score += it->second.Score;
2435 }
2436
2437 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2438
2439 // By construction, the operands are sorted in reverse order of their
2440 // topological order.
2441 // So we tend to form (sub) expressions with values that are close to
2442 // each other.
2443 //
2444 // Now to expose more CSE opportunities we want to expose the pair of
2445 // operands that occur the most (as statically computed in
2446 // BuildPairMap.) as the first sub-expression.
2447 //
2448 // If two pairs occur as many times, we pick the one with the
2449 // lowest rank, meaning the one with both operands appearing first in
2450 // the topological order.
2451 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2452 BestPair = {j, i};
2453 Max = Score;
2454 BestRank = MaxRank;
2455 }
2456 }
2457 }
2458 if (Max > 1) {
2459 auto Op0 = Ops[BestPair.first];
2460 auto Op1 = Ops[BestPair.second];
2461 Ops.erase(&Ops[BestPair.second]);
2462 Ops.erase(&Ops[BestPair.first]);
2463 Ops.push_back(Op0);
2464 Ops.push_back(Op1);
2465 }
2466 }
2467 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2468 dbgs() << '\n');
2469 // Now that we ordered and optimized the expressions, splat them back into
2470 // the expression tree, removing any unneeded nodes.
2471 RewriteExprTree(I, Ops, Flags);
2472}
2473
2474void
2475ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2476 // Make a "pairmap" of how often each operand pair occurs.
2477 for (BasicBlock *BI : RPOT) {
2478 for (Instruction &I : *BI) {
2479 if (!I.isAssociative() || !I.isBinaryOp())
2480 continue;
2481
2482 // Ignore nodes that aren't at the root of trees.
2483 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2484 continue;
2485
2486 // Collect all operands in a single reassociable expression.
2487 // Since Reassociate has already been run once, we can assume things
2488 // are already canonical according to Reassociation's regime.
2489 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2491 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2492 Value *Op = Worklist.pop_back_val();
2493 Instruction *OpI = dyn_cast<Instruction>(Op);
2494 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2495 Ops.push_back(Op);
2496 continue;
2497 }
2498 // Be paranoid about self-referencing expressions in unreachable code.
2499 if (OpI->getOperand(0) != OpI)
2500 Worklist.push_back(OpI->getOperand(0));
2501 if (OpI->getOperand(1) != OpI)
2502 Worklist.push_back(OpI->getOperand(1));
2503 }
2504 // Skip extremely long expressions.
2505 if (Ops.size() > GlobalReassociateLimit)
2506 continue;
2507
2508 // Add all pairwise combinations of operands to the pair map.
2509 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2511 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2512 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2513 // Canonicalize operand orderings.
2514 Value *Op0 = Ops[i];
2515 Value *Op1 = Ops[j];
2516 if (std::less<Value *>()(Op1, Op0))
2517 std::swap(Op0, Op1);
2518 if (!Visited.insert({Op0, Op1}).second)
2519 continue;
2520 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2521 if (!res.second) {
2522 // If either key value has been erased then we've got the same
2523 // address by coincidence. That can't happen here because nothing is
2524 // erasing values but it can happen by the time we're querying the
2525 // map.
2526 assert(res.first->second.isValid() && "WeakVH invalidated");
2527 ++res.first->second.Score;
2528 }
2529 }
2530 }
2531 }
2532 }
2533}
2534
2536 // Get the functions basic blocks in Reverse Post Order. This order is used by
2537 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2538 // blocks (it has been seen that the analysis in this pass could hang when
2539 // analysing dead basic blocks).
2541
2542 // Calculate the rank map for F.
2543 BuildRankMap(F, RPOT);
2544
2545 // Build the pair map before running reassociate.
2546 // Technically this would be more accurate if we did it after one round
2547 // of reassociation, but in practice it doesn't seem to help much on
2548 // real-world code, so don't waste the compile time running reassociate
2549 // twice.
2550 // If a user wants, they could expicitly run reassociate twice in their
2551 // pass pipeline for further potential gains.
2552 // It might also be possible to update the pair map during runtime, but the
2553 // overhead of that may be large if there's many reassociable chains.
2554 BuildPairMap(RPOT);
2555
2556 MadeChange = false;
2557
2558 // Traverse the same blocks that were analysed by BuildRankMap.
2559 for (BasicBlock *BI : RPOT) {
2560 assert(RankMap.count(&*BI) && "BB should be ranked.");
2561 // Optimize every instruction in the basic block.
2562 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2564 EraseInst(&*II++);
2565 } else {
2566 OptimizeInst(&*II);
2567 assert(II->getParent() == &*BI && "Moved to a different block!");
2568 ++II;
2569 }
2570
2571 // Make a copy of all the instructions to be redone so we can remove dead
2572 // instructions.
2573 OrderedSet ToRedo(RedoInsts);
2574 // Iterate over all instructions to be reevaluated and remove trivially dead
2575 // instructions. If any operand of the trivially dead instruction becomes
2576 // dead mark it for deletion as well. Continue this process until all
2577 // trivially dead instructions have been removed.
2578 while (!ToRedo.empty()) {
2579 Instruction *I = ToRedo.pop_back_val();
2581 RecursivelyEraseDeadInsts(I, ToRedo);
2582 MadeChange = true;
2583 }
2584 }
2585
2586 // Now that we have removed dead instructions, we can reoptimize the
2587 // remaining instructions.
2588 while (!RedoInsts.empty()) {
2589 Instruction *I = RedoInsts.front();
2590 RedoInsts.erase(RedoInsts.begin());
2592 EraseInst(I);
2593 else
2594 OptimizeInst(I);
2595 }
2596 }
2597
2598 // We are done with the rank map and pair map.
2599 RankMap.clear();
2600 ValueRankMap.clear();
2601 for (auto &Entry : PairMap)
2602 Entry.clear();
2603
2604 if (MadeChange) {
2607 return PA;
2608 }
2609
2610 return PreservedAnalyses::all();
2611}
2612
2613namespace {
2614
2615 class ReassociateLegacyPass : public FunctionPass {
2616 ReassociatePass Impl;
2617
2618 public:
2619 static char ID; // Pass identification, replacement for typeid
2620
2621 ReassociateLegacyPass() : FunctionPass(ID) {
2623 }
2624
2625 bool runOnFunction(Function &F) override {
2626 if (skipFunction(F))
2627 return false;
2628
2629 FunctionAnalysisManager DummyFAM;
2630 auto PA = Impl.run(F, DummyFAM);
2631 return !PA.areAllPreserved();
2632 }
2633
2634 void getAnalysisUsage(AnalysisUsage &AU) const override {
2635 AU.setPreservesCFG();
2639 }
2640 };
2641
2642} // end anonymous namespace
2643
2644char ReassociateLegacyPass::ID = 0;
2645
2646INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2647 "Reassociate expressions", false, false)
2648
2649// Public interface to the Reassociate pass
2651 return new ReassociateLegacyPass();
2652}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition: LICM.cpp:2676
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
nary reassociate
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static Value * EmitAddTreeOfValues(BasicBlock::iterator It, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
std::pair< Value *, uint64_t > RepeatedValue
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, reassociate::OverflowTracking &Flags)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static Instruction * CreateNeg(Value *S1, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static bool isLoadCombineCandidate(Instruction *Or)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:77
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:450
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:384
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2686
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2615
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2549
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1545
Value * CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1572
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:309
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1585
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1359
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void dropLocation()
Drop the instruction's debug location.
Definition: DebugInfo.cpp:967
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:476
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:473
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: Analysis.h:281
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
Reassociate commutative expressions.
Definition: Reassociate.h:82
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:316
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1665
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1434
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2013
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:617
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:60