LLVM 20.0.0git
Reassociate.cpp
Go to the documentation of this file.
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates commutative expressions in an order that is designed
10// to promote better constant propagation, GCSE, LICM, PRE, etc.
11//
12// For example: 4 + (x + 5) -> x + (4 + 5)
13//
14// In the implementation of this algorithm, constants are assigned rank = 0,
15// function arguments are rank = 1, and other values are assigned ranks
16// corresponding to the reverse post order traversal of current function
17// (starting at 2), which effectively gives values in deep loops higher rank
18// than values not in loops.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Operator.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/IR/ValueHandle.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <utility>
63
64using namespace llvm;
65using namespace reassociate;
66using namespace PatternMatch;
67
68#define DEBUG_TYPE "reassociate"
69
70STATISTIC(NumChanged, "Number of insts reassociated");
71STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72STATISTIC(NumFactor , "Number of multiplies factored");
73
74static cl::opt<bool>
75 UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden);
79
80#ifndef NDEBUG
81/// Print out the expression identified in the Ops list.
83 Module *M = I->getModule();
84 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85 << *Ops[0].Op->getType() << '\t';
86 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
87 dbgs() << "[ ";
88 Ops[i].Op->printAsOperand(dbgs(), false, M);
89 dbgs() << ", #" << Ops[i].Rank << "] ";
90 }
91}
92#endif
93
94/// Utility class representing a non-constant Xor-operand. We classify
95/// non-constant Xor-Operands into two categories:
96/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97/// C2)
98/// C2.1) The operand is in the form of "X | C", where C is a non-zero
99/// constant.
100/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101/// operand as "E | 0"
103public:
104 XorOpnd(Value *V);
105
106 bool isInvalid() const { return SymbolicPart == nullptr; }
107 bool isOrExpr() const { return isOr; }
108 Value *getValue() const { return OrigVal; }
109 Value *getSymbolicPart() const { return SymbolicPart; }
110 unsigned getSymbolicRank() const { return SymbolicRank; }
111 const APInt &getConstPart() const { return ConstPart; }
112
113 void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114 void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116private:
117 Value *OrigVal;
118 Value *SymbolicPart;
119 APInt ConstPart;
120 unsigned SymbolicRank;
121 bool isOr;
122};
123
125 assert(!isa<ConstantInt>(V) && "No ConstantInt");
126 OrigVal = V;
127 Instruction *I = dyn_cast<Instruction>(V);
128 SymbolicRank = 0;
129
130 if (I && (I->getOpcode() == Instruction::Or ||
131 I->getOpcode() == Instruction::And)) {
132 Value *V0 = I->getOperand(0);
133 Value *V1 = I->getOperand(1);
134 const APInt *C;
135 if (match(V0, m_APInt(C)))
136 std::swap(V0, V1);
137
138 if (match(V1, m_APInt(C))) {
139 ConstPart = *C;
140 SymbolicPart = V0;
141 isOr = (I->getOpcode() == Instruction::Or);
142 return;
143 }
144 }
145
146 // view the operand as "V | 0"
147 SymbolicPart = V;
148 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
149 isOr = true;
150}
151
152/// Return true if I is an instruction with the FastMathFlags that are needed
153/// for general reassociation set. This is not the same as testing
154/// Instruction::isAssociative() because it includes operations like fsub.
155/// (This routine is only intended to be called for floating-point operations.)
157 assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158 return I->hasAllowReassoc() && I->hasNoSignedZeros();
159}
160
161/// Return true if V is an instruction of the specified opcode and if it
162/// only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164 auto *BO = dyn_cast<BinaryOperator>(V);
165 if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
167 return BO;
168 return nullptr;
169}
170
171static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
172 unsigned Opcode2) {
173 auto *BO = dyn_cast<BinaryOperator>(V);
174 if (BO && BO->hasOneUse() &&
175 (BO->getOpcode() == Opcode1 || BO->getOpcode() == Opcode2))
176 if (!isa<FPMathOperator>(BO) || hasFPAssociativeFlags(BO))
177 return BO;
178 return nullptr;
179}
180
181void ReassociatePass::BuildRankMap(Function &F,
183 unsigned Rank = 2;
184
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg : F.args()) {
187 ValueRankMap[&Arg] = ++Rank;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189 << "\n");
190 }
191
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock *BB : RPOT) {
194 unsigned BBRank = RankMap[BB] = ++Rank << 16;
195
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction &I : *BB)
201 ValueRankMap[&I] = ++BBRank;
202 }
203}
204
205unsigned ReassociatePass::getRank(Value *V) {
206 Instruction *I = dyn_cast<Instruction>(V);
207 if (!I) {
208 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
210 }
211
212 if (unsigned Rank = ValueRankMap[I])
213 return Rank; // Rank already known?
214
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
220 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
221 Rank = std::max(Rank, getRank(I->getOperand(i)));
222
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
226 !match(I, m_FNeg(m_Value())))
227 ++Rank;
228
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
230 << "\n");
231
232 return ValueRankMap[I] = Rank;
233}
234
235// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236void ReassociatePass::canonicalizeOperands(Instruction *I) {
237 assert(isa<BinaryOperator>(I) && "Expected binary operator.");
238 assert(I->isCommutative() && "Expected commutative operator.");
239
240 Value *LHS = I->getOperand(0);
241 Value *RHS = I->getOperand(1);
242 if (LHS == RHS || isa<Constant>(RHS))
243 return;
244 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS))
245 cast<BinaryOperator>(I)->swapOperands();
246}
247
249 BasicBlock::iterator InsertBefore,
250 Value *FlagsOp) {
251 if (S1->getType()->isIntOrIntVectorTy())
252 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
253 else {
254 BinaryOperator *Res =
255 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
256 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
257 return Res;
258 }
259}
260
262 BasicBlock::iterator InsertBefore,
263 Value *FlagsOp) {
264 if (S1->getType()->isIntOrIntVectorTy())
265 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
266 else {
267 BinaryOperator *Res =
268 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
269 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
270 return Res;
271 }
272}
273
275 BasicBlock::iterator InsertBefore,
276 Value *FlagsOp) {
277 if (S1->getType()->isIntOrIntVectorTy())
278 return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
279
280 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp))
281 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore);
282
283 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore);
284}
285
286/// Replace 0-X with X*-1.
288 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
289 "Expected a Negate!");
290 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
291 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
292 Type *Ty = Neg->getType();
293 Constant *NegOne = Ty->isIntOrIntVectorTy() ?
294 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
295
296 BinaryOperator *Res =
297 CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
298 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
299 Res->takeName(Neg);
300 Neg->replaceAllUsesWith(Res);
301 Res->setDebugLoc(Neg->getDebugLoc());
302 return Res;
303}
304
305using RepeatedValue = std::pair<Value *, uint64_t>;
306
307/// Given an associative binary expression, return the leaf
308/// nodes in Ops along with their weights (how many times the leaf occurs). The
309/// original expression is the same as
310/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
311/// op
312/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
313/// op
314/// ...
315/// op
316/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
317///
318/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
319///
320/// This routine may modify the function, in which case it returns 'true'. The
321/// changes it makes may well be destructive, changing the value computed by 'I'
322/// to something completely different. Thus if the routine returns 'true' then
323/// you MUST either replace I with a new expression computed from the Ops array,
324/// or use RewriteExprTree to put the values back in.
325///
326/// A leaf node is either not a binary operation of the same kind as the root
327/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
328/// opcode), or is the same kind of binary operator but has a use which either
329/// does not belong to the expression, or does belong to the expression but is
330/// a leaf node. Every leaf node has at least one use that is a non-leaf node
331/// of the expression, while for non-leaf nodes (except for the root 'I') every
332/// use is a non-leaf node of the expression.
333///
334/// For example:
335/// expression graph node names
336///
337/// + | I
338/// / \ |
339/// + + | A, B
340/// / \ / \ |
341/// * + * | C, D, E
342/// / \ / \ / \ |
343/// + * | F, G
344///
345/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
346/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
347///
348/// The expression is maximal: if some instruction is a binary operator of the
349/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
350/// then the instruction also belongs to the expression, is not a leaf node of
351/// it, and its operands also belong to the expression (but may be leaf nodes).
352///
353/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
354/// order to ensure that every non-root node in the expression has *exactly one*
355/// use by a non-leaf node of the expression. This destruction means that the
356/// caller MUST either replace 'I' with a new expression or use something like
357/// RewriteExprTree to put the values back in if the routine indicates that it
358/// made a change by returning 'true'.
359///
360/// In the above example either the right operand of A or the left operand of B
361/// will be replaced by undef. If it is B's operand then this gives:
362///
363/// + | I
364/// / \ |
365/// + + | A, B - operand of B replaced with undef
366/// / \ \ |
367/// * + * | C, D, E
368/// / \ / \ / \ |
369/// + * | F, G
370///
371/// Note that such undef operands can only be reached by passing through 'I'.
372/// For example, if you visit operands recursively starting from a leaf node
373/// then you will never see such an undef operand unless you get back to 'I',
374/// which requires passing through a phi node.
375///
376/// Note that this routine may also mutate binary operators of the wrong type
377/// that have all uses inside the expression (i.e. only used by non-leaf nodes
378/// of the expression) if it can turn them into binary operators of the right
379/// type and thus make the expression bigger.
384 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
385 "Expected a UnaryOperator or BinaryOperator!");
386 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
387 unsigned Opcode = I->getOpcode();
388 assert(I->isAssociative() && I->isCommutative() &&
389 "Expected an associative and commutative operation!");
390
391 // Visit all operands of the expression, keeping track of their weight (the
392 // number of paths from the expression root to the operand, or if you like
393 // the number of times that operand occurs in the linearized expression).
394 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
395 // while A has weight two.
396
397 // Worklist of non-leaf nodes (their operands are in the expression too) along
398 // with their weights, representing a certain number of paths to the operator.
399 // If an operator occurs in the worklist multiple times then we found multiple
400 // ways to get to it.
401 SmallVector<std::pair<Instruction *, uint64_t>, 8> Worklist; // (Op, Weight)
402 Worklist.push_back(std::make_pair(I, 1));
403 bool Changed = false;
404
405 // Leaves of the expression are values that either aren't the right kind of
406 // operation (eg: a constant, or a multiply in an add tree), or are, but have
407 // some uses that are not inside the expression. For example, in I = X + X,
408 // X = A + B, the value X has two uses (by I) that are in the expression. If
409 // X has any other uses, for example in a return instruction, then we consider
410 // X to be a leaf, and won't analyze it further. When we first visit a value,
411 // if it has more than one use then at first we conservatively consider it to
412 // be a leaf. Later, as the expression is explored, we may discover some more
413 // uses of the value from inside the expression. If all uses turn out to be
414 // from within the expression (and the value is a binary operator of the right
415 // kind) then the value is no longer considered to be a leaf, and its operands
416 // are explored.
417
418 // Leaves - Keeps track of the set of putative leaves as well as the number of
419 // paths to each leaf seen so far.
420 using LeafMap = DenseMap<Value *, uint64_t>;
421 LeafMap Leaves; // Leaf -> Total weight so far.
422 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
423 const DataLayout DL = I->getDataLayout();
424
425#ifndef NDEBUG
426 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
427#endif
428 while (!Worklist.empty()) {
429 // We examine the operands of this binary operator.
430 auto [I, Weight] = Worklist.pop_back_val();
431
432 if (isa<OverflowingBinaryOperator>(I)) {
433 Flags.HasNUW &= I->hasNoUnsignedWrap();
434 Flags.HasNSW &= I->hasNoSignedWrap();
435 }
436
437 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
438 Value *Op = I->getOperand(OpIdx);
439 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
440 assert(!Op->use_empty() && "No uses, so how did we get to it?!");
441
442 // If this is a binary operation of the right kind with only one use then
443 // add its operands to the expression.
444 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
445 assert(Visited.insert(Op).second && "Not first visit!");
446 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
447 Worklist.push_back(std::make_pair(BO, Weight));
448 continue;
449 }
450
451 // Appears to be a leaf. Is the operand already in the set of leaves?
452 LeafMap::iterator It = Leaves.find(Op);
453 if (It == Leaves.end()) {
454 // Not in the leaf map. Must be the first time we saw this operand.
455 assert(Visited.insert(Op).second && "Not first visit!");
456 if (!Op->hasOneUse()) {
457 // This value has uses not accounted for by the expression, so it is
458 // not safe to modify. Mark it as being a leaf.
460 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
461 LeafOrder.push_back(Op);
462 Leaves[Op] = Weight;
463 continue;
464 }
465 // No uses outside the expression, try morphing it.
466 } else {
467 // Already in the leaf map.
468 assert(It != Leaves.end() && Visited.count(Op) &&
469 "In leaf map but not visited!");
470
471 // Update the number of paths to the leaf.
472 It->second += Weight;
473 assert(It->second >= Weight && "Weight overflows");
474
475 // If we still have uses that are not accounted for by the expression
476 // then it is not safe to modify the value.
477 if (!Op->hasOneUse())
478 continue;
479
480 // No uses outside the expression, try morphing it.
481 Weight = It->second;
482 Leaves.erase(It); // Since the value may be morphed below.
483 }
484
485 // At this point we have a value which, first of all, is not a binary
486 // expression of the right kind, and secondly, is only used inside the
487 // expression. This means that it can safely be modified. See if we
488 // can usefully morph it into an expression of the right kind.
489 assert((!isa<Instruction>(Op) ||
490 cast<Instruction>(Op)->getOpcode() != Opcode
491 || (isa<FPMathOperator>(Op) &&
492 !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
493 "Should have been handled above!");
494 assert(Op->hasOneUse() && "Has uses outside the expression tree!");
495
496 // If this is a multiply expression, turn any internal negations into
497 // multiplies by -1 so they can be reassociated. Add any users of the
498 // newly created multiplication by -1 to the redo list, so any
499 // reassociation opportunities that are exposed will be reassociated
500 // further.
501 Instruction *Neg;
502 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
503 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
504 match(Op, m_Instruction(Neg))) {
506 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
508 LLVM_DEBUG(dbgs() << *Mul << '\n');
509 Worklist.push_back(std::make_pair(Mul, Weight));
510 for (User *U : Mul->users()) {
511 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
512 ToRedo.insert(UserBO);
513 }
514 ToRedo.insert(Neg);
515 Changed = true;
516 continue;
517 }
518
519 // Failed to morph into an expression of the right type. This really is
520 // a leaf.
521 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
522 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
523 LeafOrder.push_back(Op);
524 Leaves[Op] = Weight;
525 }
526 }
527
528 // The leaves, repeated according to their weights, represent the linearized
529 // form of the expression.
530 for (Value *V : LeafOrder) {
531 LeafMap::iterator It = Leaves.find(V);
532 if (It == Leaves.end())
533 // Node initially thought to be a leaf wasn't.
534 continue;
535 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
536 uint64_t Weight = It->second;
537 // Ensure the leaf is only output once.
538 It->second = 0;
539 Ops.push_back(std::make_pair(V, Weight));
540 if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
541 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
542 else if (Opcode == Instruction::Mul) {
543 // To preserve NUW we need all inputs non-zero.
544 // To preserve NSW we need all inputs strictly positive.
545 if (Flags.AllKnownNonZero &&
546 (Flags.HasNUW || (Flags.HasNSW && Flags.AllKnownNonNegative))) {
547 Flags.AllKnownNonZero &= isKnownNonZero(V, SimplifyQuery(DL));
548 if (Flags.HasNSW && Flags.AllKnownNonNegative)
549 Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
550 }
551 }
552 }
553
554 // For nilpotent operations or addition there may be no operands, for example
555 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
556 // in both cases the weight reduces to 0 causing the value to be skipped.
557 if (Ops.empty()) {
558 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
559 assert(Identity && "Associative operation without identity!");
560 Ops.emplace_back(Identity, 1);
561 }
562
563 return Changed;
564}
565
566/// Now that the operands for this expression tree are
567/// linearized and optimized, emit them in-order.
568void ReassociatePass::RewriteExprTree(BinaryOperator *I,
570 OverflowTracking Flags) {
571 assert(Ops.size() > 1 && "Single values should be used directly!");
572
573 // Since our optimizations should never increase the number of operations, the
574 // new expression can usually be written reusing the existing binary operators
575 // from the original expression tree, without creating any new instructions,
576 // though the rewritten expression may have a completely different topology.
577 // We take care to not change anything if the new expression will be the same
578 // as the original. If more than trivial changes (like commuting operands)
579 // were made then we are obliged to clear out any optional subclass data like
580 // nsw flags.
581
582 /// NodesToRewrite - Nodes from the original expression available for writing
583 /// the new expression into.
584 SmallVector<BinaryOperator*, 8> NodesToRewrite;
585 unsigned Opcode = I->getOpcode();
587
588 /// NotRewritable - The operands being written will be the leaves of the new
589 /// expression and must not be used as inner nodes (via NodesToRewrite) by
590 /// mistake. Inner nodes are always reassociable, and usually leaves are not
591 /// (if they were they would have been incorporated into the expression and so
592 /// would not be leaves), so most of the time there is no danger of this. But
593 /// in rare cases a leaf may become reassociable if an optimization kills uses
594 /// of it, or it may momentarily become reassociable during rewriting (below)
595 /// due it being removed as an operand of one of its uses. Ensure that misuse
596 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
597 /// leaves and refusing to reuse any of them as inner nodes.
598 SmallPtrSet<Value*, 8> NotRewritable;
599 for (const ValueEntry &Op : Ops)
600 NotRewritable.insert(Op.Op);
601
602 // ExpressionChangedStart - Non-null if the rewritten expression differs from
603 // the original in some non-trivial way, requiring the clearing of optional
604 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
605 // ExpressionChangedEnd inclusive.
606 BinaryOperator *ExpressionChangedStart = nullptr,
607 *ExpressionChangedEnd = nullptr;
608 for (unsigned i = 0; ; ++i) {
609 // The last operation (which comes earliest in the IR) is special as both
610 // operands will come from Ops, rather than just one with the other being
611 // a subexpression.
612 if (i+2 == Ops.size()) {
613 Value *NewLHS = Ops[i].Op;
614 Value *NewRHS = Ops[i+1].Op;
615 Value *OldLHS = Op->getOperand(0);
616 Value *OldRHS = Op->getOperand(1);
617
618 if (NewLHS == OldLHS && NewRHS == OldRHS)
619 // Nothing changed, leave it alone.
620 break;
621
622 if (NewLHS == OldRHS && NewRHS == OldLHS) {
623 // The order of the operands was reversed. Swap them.
624 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
625 Op->swapOperands();
626 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
627 MadeChange = true;
628 ++NumChanged;
629 break;
630 }
631
632 // The new operation differs non-trivially from the original. Overwrite
633 // the old operands with the new ones.
634 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
635 if (NewLHS != OldLHS) {
636 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
637 if (BO && !NotRewritable.count(BO))
638 NodesToRewrite.push_back(BO);
639 Op->setOperand(0, NewLHS);
640 }
641 if (NewRHS != OldRHS) {
642 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
643 if (BO && !NotRewritable.count(BO))
644 NodesToRewrite.push_back(BO);
645 Op->setOperand(1, NewRHS);
646 }
647 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
648
649 ExpressionChangedStart = Op;
650 if (!ExpressionChangedEnd)
651 ExpressionChangedEnd = Op;
652 MadeChange = true;
653 ++NumChanged;
654
655 break;
656 }
657
658 // Not the last operation. The left-hand side will be a sub-expression
659 // while the right-hand side will be the current element of Ops.
660 Value *NewRHS = Ops[i].Op;
661 if (NewRHS != Op->getOperand(1)) {
662 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
663 if (NewRHS == Op->getOperand(0)) {
664 // The new right-hand side was already present as the left operand. If
665 // we are lucky then swapping the operands will sort out both of them.
666 Op->swapOperands();
667 } else {
668 // Overwrite with the new right-hand side.
669 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
670 if (BO && !NotRewritable.count(BO))
671 NodesToRewrite.push_back(BO);
672 Op->setOperand(1, NewRHS);
673 ExpressionChangedStart = Op;
674 if (!ExpressionChangedEnd)
675 ExpressionChangedEnd = Op;
676 }
677 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
678 MadeChange = true;
679 ++NumChanged;
680 }
681
682 // Now deal with the left-hand side. If this is already an operation node
683 // from the original expression then just rewrite the rest of the expression
684 // into it.
685 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
686 if (BO && !NotRewritable.count(BO)) {
687 Op = BO;
688 continue;
689 }
690
691 // Otherwise, grab a spare node from the original expression and use that as
692 // the left-hand side. If there are no nodes left then the optimizers made
693 // an expression with more nodes than the original! This usually means that
694 // they did something stupid but it might mean that the problem was just too
695 // hard (finding the mimimal number of multiplications needed to realize a
696 // multiplication expression is NP-complete). Whatever the reason, smart or
697 // stupid, create a new node if there are none left.
698 BinaryOperator *NewOp;
699 if (NodesToRewrite.empty()) {
700 Constant *Poison = PoisonValue::get(I->getType());
702 Poison, "", I->getIterator());
703 if (isa<FPMathOperator>(NewOp))
704 NewOp->setFastMathFlags(I->getFastMathFlags());
705 } else {
706 NewOp = NodesToRewrite.pop_back_val();
707 }
708
709 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
710 Op->setOperand(0, NewOp);
711 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
712 ExpressionChangedStart = Op;
713 if (!ExpressionChangedEnd)
714 ExpressionChangedEnd = Op;
715 MadeChange = true;
716 ++NumChanged;
717 Op = NewOp;
718 }
719
720 // If the expression changed non-trivially then clear out all subclass data
721 // starting from the operator specified in ExpressionChanged, and compactify
722 // the operators to just before the expression root to guarantee that the
723 // expression tree is dominated by all of Ops.
724 if (ExpressionChangedStart) {
725 bool ClearFlags = true;
726 do {
727 // Preserve flags.
728 if (ClearFlags) {
729 if (isa<FPMathOperator>(I)) {
730 FastMathFlags Flags = I->getFastMathFlags();
731 ExpressionChangedStart->clearSubclassOptionalData();
732 ExpressionChangedStart->setFastMathFlags(Flags);
733 } else {
734 ExpressionChangedStart->clearSubclassOptionalData();
735 if (ExpressionChangedStart->getOpcode() == Instruction::Add ||
736 (ExpressionChangedStart->getOpcode() == Instruction::Mul &&
737 Flags.AllKnownNonZero)) {
738 if (Flags.HasNUW)
739 ExpressionChangedStart->setHasNoUnsignedWrap();
740 if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW))
741 ExpressionChangedStart->setHasNoSignedWrap();
742 }
743 }
744 }
745
746 if (ExpressionChangedStart == ExpressionChangedEnd)
747 ClearFlags = false;
748 if (ExpressionChangedStart == I)
749 break;
750
751 // Discard any debug info related to the expressions that has changed (we
752 // can leave debug info related to the root and any operation that didn't
753 // change, since the result of the expression tree should be the same
754 // even after reassociation).
755 if (ClearFlags)
756 replaceDbgUsesWithUndef(ExpressionChangedStart);
757
758 ExpressionChangedStart->moveBefore(I);
759 ExpressionChangedStart =
760 cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
761 } while (true);
762 }
763
764 // Throw away any left over nodes from the original expression.
765 for (BinaryOperator *BO : NodesToRewrite)
766 RedoInsts.insert(BO);
767}
768
769/// Insert instructions before the instruction pointed to by BI,
770/// that computes the negative version of the value specified. The negative
771/// version of the value is returned, and BI is left pointing at the instruction
772/// that should be processed next by the reassociation pass.
773/// Also add intermediate instructions to the redo list that are modified while
774/// pushing the negates through adds. These will be revisited to see if
775/// additional opportunities have been exposed.
778 if (auto *C = dyn_cast<Constant>(V)) {
779 const DataLayout &DL = BI->getDataLayout();
780 Constant *Res = C->getType()->isFPOrFPVectorTy()
781 ? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
783 if (Res)
784 return Res;
785 }
786
787 // We are trying to expose opportunity for reassociation. One of the things
788 // that we want to do to achieve this is to push a negation as deep into an
789 // expression chain as possible, to expose the add instructions. In practice,
790 // this means that we turn this:
791 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
792 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
793 // the constants. We assume that instcombine will clean up the mess later if
794 // we introduce tons of unnecessary negation instructions.
795 //
796 if (BinaryOperator *I =
797 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
798 // Push the negates through the add.
799 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
800 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
801 if (I->getOpcode() == Instruction::Add) {
802 I->setHasNoUnsignedWrap(false);
803 I->setHasNoSignedWrap(false);
804 }
805
806 // We must move the add instruction here, because the neg instructions do
807 // not dominate the old add instruction in general. By moving it, we are
808 // assured that the neg instructions we just inserted dominate the
809 // instruction we are about to insert after them.
810 //
811 I->moveBefore(BI);
812 I->setName(I->getName()+".neg");
813
814 // Add the intermediate negates to the redo list as processing them later
815 // could expose more reassociating opportunities.
816 ToRedo.insert(I);
817 return I;
818 }
819
820 // Okay, we need to materialize a negated version of V with an instruction.
821 // Scan the use lists of V to see if we have one already.
822 for (User *U : V->users()) {
823 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
824 continue;
825
826 // We found one! Now we have to make sure that the definition dominates
827 // this use. We do this by moving it to the entry block (if it is a
828 // non-instruction value) or right after the definition. These negates will
829 // be zapped by reassociate later, so we don't need much finesse here.
830 Instruction *TheNeg = dyn_cast<Instruction>(U);
831
832 // We can't safely propagate a vector zero constant with poison/undef lanes.
833 Constant *C;
834 if (match(TheNeg, m_BinOp(m_Constant(C), m_Value())) &&
835 C->containsUndefOrPoisonElement())
836 continue;
837
838 // Verify that the negate is in this function, V might be a constant expr.
839 if (!TheNeg ||
840 TheNeg->getParent()->getParent() != BI->getParent()->getParent())
841 continue;
842
843 BasicBlock::iterator InsertPt;
844 if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
845 auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
846 if (!InsertPtOpt)
847 continue;
848 InsertPt = *InsertPtOpt;
849 } else {
850 InsertPt = TheNeg->getFunction()
851 ->getEntryBlock()
853 ->getIterator();
854 }
855
856 // Check that if TheNeg is moved out of its parent block, we drop its
857 // debug location to avoid extra coverage.
858 // See test dropping_debugloc_the_neg.ll for a detailed example.
859 if (TheNeg->getParent() != InsertPt->getParent())
860 TheNeg->dropLocation();
861 TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
862
863 if (TheNeg->getOpcode() == Instruction::Sub) {
864 TheNeg->setHasNoUnsignedWrap(false);
865 TheNeg->setHasNoSignedWrap(false);
866 } else {
867 TheNeg->andIRFlags(BI);
868 }
869 ToRedo.insert(TheNeg);
870 return TheNeg;
871 }
872
873 // Insert a 'neg' instruction that subtracts the value from zero to get the
874 // negation.
875 Instruction *NewNeg =
876 CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
877 ToRedo.insert(NewNeg);
878 return NewNeg;
879}
880
881// See if this `or` looks like an load widening reduction, i.e. that it
882// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
883// ensure that the pattern is *really* a load widening reduction,
884// we do not ensure that it can really be replaced with a widened load,
885// only that it mostly looks like one.
889
890 auto Enqueue = [&](Value *V) {
891 auto *I = dyn_cast<Instruction>(V);
892 // Each node of an `or` reduction must be an instruction,
893 if (!I)
894 return false; // Node is certainly not part of an `or` load reduction.
895 // Only process instructions we have never processed before.
896 if (Visited.insert(I).second)
897 Worklist.emplace_back(I);
898 return true; // Will need to look at parent nodes.
899 };
900
901 if (!Enqueue(Or))
902 return false; // Not an `or` reduction pattern.
903
904 while (!Worklist.empty()) {
905 auto *I = Worklist.pop_back_val();
906
907 // Okay, which instruction is this node?
908 switch (I->getOpcode()) {
909 case Instruction::Or:
910 // Got an `or` node. That's fine, just recurse into it's operands.
911 for (Value *Op : I->operands())
912 if (!Enqueue(Op))
913 return false; // Not an `or` reduction pattern.
914 continue;
915
916 case Instruction::Shl:
917 case Instruction::ZExt:
918 // `shl`/`zext` nodes are fine, just recurse into their base operand.
919 if (!Enqueue(I->getOperand(0)))
920 return false; // Not an `or` reduction pattern.
921 continue;
922
923 case Instruction::Load:
924 // Perfect, `load` node means we've reached an edge of the graph.
925 continue;
926
927 default: // Unknown node.
928 return false; // Not an `or` reduction pattern.
929 }
930 }
931
932 return true;
933}
934
935/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
937 // Don't bother to convert this up unless either the LHS is an associable add
938 // or subtract or mul or if this is only used by one of the above.
939 // This is only a compile-time improvement, it is not needed for correctness!
940 auto isInteresting = [](Value *V) {
941 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
942 Instruction::Shl})
943 if (isReassociableOp(V, Op))
944 return true;
945 return false;
946 };
947
948 if (any_of(Or->operands(), isInteresting))
949 return true;
950
951 Value *VB = Or->user_back();
952 if (Or->hasOneUse() && isInteresting(VB))
953 return true;
954
955 return false;
956}
957
958/// If we have (X|Y), and iff X and Y have no common bits set,
959/// transform this into (X+Y) to allow arithmetics reassociation.
961 // Convert an or into an add.
962 BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
963 Or->getIterator(), Or);
964 New->setHasNoSignedWrap();
965 New->setHasNoUnsignedWrap();
966 New->takeName(Or);
967
968 // Everyone now refers to the add instruction.
969 Or->replaceAllUsesWith(New);
970 New->setDebugLoc(Or->getDebugLoc());
971
972 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n');
973 return New;
974}
975
976/// Return true if we should break up this subtract of X-Y into (X + -Y).
978 // If this is a negation, we can't split it up!
979 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value())))
980 return false;
981
982 // Don't breakup X - undef.
983 if (isa<UndefValue>(Sub->getOperand(1)))
984 return false;
985
986 // Don't bother to break this up unless either the LHS is an associable add or
987 // subtract or if this is only used by one.
988 Value *V0 = Sub->getOperand(0);
989 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
990 isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
991 return true;
992 Value *V1 = Sub->getOperand(1);
993 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
994 isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
995 return true;
996 Value *VB = Sub->user_back();
997 if (Sub->hasOneUse() &&
998 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
999 isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
1000 return true;
1001
1002 return false;
1003}
1004
1005/// If we have (X-Y), and if either X is an add, or if this is only used by an
1006/// add, transform this into (X+(0-Y)) to promote better reassociation.
1009 // Convert a subtract into an add and a neg instruction. This allows sub
1010 // instructions to be commuted with other add instructions.
1011 //
1012 // Calculate the negative value of Operand 1 of the sub instruction,
1013 // and set it as the RHS of the add instruction we just made.
1014 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
1015 BinaryOperator *New =
1016 CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
1017 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
1018 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
1019 New->takeName(Sub);
1020
1021 // Everyone now refers to the add instruction.
1022 Sub->replaceAllUsesWith(New);
1023 New->setDebugLoc(Sub->getDebugLoc());
1024
1025 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n');
1026 return New;
1027}
1028
1029/// If this is a shift of a reassociable multiply or is used by one, change
1030/// this into a multiply by a constant to assist with further reassociation.
1032 Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
1033 auto *SA = cast<ConstantInt>(Shl->getOperand(1));
1034 MulCst = ConstantFoldBinaryInstruction(Instruction::Shl, MulCst, SA);
1035 assert(MulCst && "Constant folding of immediate constants failed");
1036
1037 BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
1038 "", Shl->getIterator());
1039 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
1040 Mul->takeName(Shl);
1041
1042 // Everyone now refers to the mul instruction.
1043 Shl->replaceAllUsesWith(Mul);
1044 Mul->setDebugLoc(Shl->getDebugLoc());
1045
1046 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1047 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1048 // handling. It can be preserved as long as we're not left shifting by
1049 // bitwidth - 1.
1050 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
1051 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
1052 unsigned BitWidth = Shl->getType()->getIntegerBitWidth();
1053 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1)))
1054 Mul->setHasNoSignedWrap(true);
1055 Mul->setHasNoUnsignedWrap(NUW);
1056 return Mul;
1057}
1058
1059/// Scan backwards and forwards among values with the same rank as element i
1060/// to see if X exists. If X does not exist, return i. This is useful when
1061/// scanning for 'x' when we see '-x' because they both get the same rank.
1063 unsigned i, Value *X) {
1064 unsigned XRank = Ops[i].Rank;
1065 unsigned e = Ops.size();
1066 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
1067 if (Ops[j].Op == X)
1068 return j;
1069 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1070 if (Instruction *I2 = dyn_cast<Instruction>(X))
1071 if (I1->isIdenticalTo(I2))
1072 return j;
1073 }
1074 // Scan backwards.
1075 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
1076 if (Ops[j].Op == X)
1077 return j;
1078 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
1079 if (Instruction *I2 = dyn_cast<Instruction>(X))
1080 if (I1->isIdenticalTo(I2))
1081 return j;
1082 }
1083 return i;
1084}
1085
1086/// Emit a tree of add instructions, summing Ops together
1087/// and returning the result. Insert the tree before I.
1090 if (Ops.size() == 1) return Ops.back();
1091
1092 Value *V1 = Ops.pop_back_val();
1093 Value *V2 = EmitAddTreeOfValues(It, Ops);
1094 return CreateAdd(V2, V1, "reass.add", It, &*It);
1095}
1096
1097/// If V is an expression tree that is a multiplication sequence,
1098/// and if this sequence contains a multiply by Factor,
1099/// remove Factor from the tree and return the new tree.
1100Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
1101 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1102 if (!BO)
1103 return nullptr;
1104
1107 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags);
1109 Factors.reserve(Tree.size());
1110 for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
1111 RepeatedValue E = Tree[i];
1112 Factors.append(E.second, ValueEntry(getRank(E.first), E.first));
1113 }
1114
1115 bool FoundFactor = false;
1116 bool NeedsNegate = false;
1117 for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1118 if (Factors[i].Op == Factor) {
1119 FoundFactor = true;
1120 Factors.erase(Factors.begin()+i);
1121 break;
1122 }
1123
1124 // If this is a negative version of this factor, remove it.
1125 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
1126 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
1127 if (FC1->getValue() == -FC2->getValue()) {
1128 FoundFactor = NeedsNegate = true;
1129 Factors.erase(Factors.begin()+i);
1130 break;
1131 }
1132 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
1133 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
1134 const APFloat &F1 = FC1->getValueAPF();
1135 APFloat F2(FC2->getValueAPF());
1136 F2.changeSign();
1137 if (F1 == F2) {
1138 FoundFactor = NeedsNegate = true;
1139 Factors.erase(Factors.begin() + i);
1140 break;
1141 }
1142 }
1143 }
1144 }
1145
1146 if (!FoundFactor) {
1147 // Make sure to restore the operands to the expression tree.
1148 RewriteExprTree(BO, Factors, Flags);
1149 return nullptr;
1150 }
1151
1152 BasicBlock::iterator InsertPt = ++BO->getIterator();
1153
1154 // If this was just a single multiply, remove the multiply and return the only
1155 // remaining operand.
1156 if (Factors.size() == 1) {
1157 RedoInsts.insert(BO);
1158 V = Factors[0].Op;
1159 } else {
1160 RewriteExprTree(BO, Factors, Flags);
1161 V = BO;
1162 }
1163
1164 if (NeedsNegate)
1165 V = CreateNeg(V, "neg", InsertPt, BO);
1166
1167 return V;
1168}
1169
1170/// If V is a single-use multiply, recursively add its operands as factors,
1171/// otherwise add V to the list of factors.
1172///
1173/// Ops is the top-level list of add operands we're trying to factor.
1175 SmallVectorImpl<Value*> &Factors) {
1176 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
1177 if (!BO) {
1178 Factors.push_back(V);
1179 return;
1180 }
1181
1182 // Otherwise, add the LHS and RHS to the list of factors.
1185}
1186
1187/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1188/// This optimizes based on identities. If it can be reduced to a single Value,
1189/// it is returned, otherwise the Ops list is mutated as necessary.
1190static Value *OptimizeAndOrXor(unsigned Opcode,
1192 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1193 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1194 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1195 // First, check for X and ~X in the operand list.
1196 assert(i < Ops.size());
1197 Value *X;
1198 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^.
1199 unsigned FoundX = FindInOperandList(Ops, i, X);
1200 if (FoundX != i) {
1201 if (Opcode == Instruction::And) // ...&X&~X = 0
1202 return Constant::getNullValue(X->getType());
1203
1204 if (Opcode == Instruction::Or) // ...|X|~X = -1
1205 return Constant::getAllOnesValue(X->getType());
1206 }
1207 }
1208
1209 // Next, check for duplicate pairs of values, which we assume are next to
1210 // each other, due to our sorting criteria.
1211 assert(i < Ops.size());
1212 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
1213 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
1214 // Drop duplicate values for And and Or.
1215 Ops.erase(Ops.begin()+i);
1216 --i; --e;
1217 ++NumAnnihil;
1218 continue;
1219 }
1220
1221 // Drop pairs of values for Xor.
1222 assert(Opcode == Instruction::Xor);
1223 if (e == 2)
1224 return Constant::getNullValue(Ops[0].Op->getType());
1225
1226 // Y ^ X^X -> Y
1227 Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
1228 i -= 1; e -= 2;
1229 ++NumAnnihil;
1230 }
1231 }
1232 return nullptr;
1233}
1234
1235/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1236/// instruction with the given two operands, and return the resulting
1237/// instruction. There are two special cases: 1) if the constant operand is 0,
1238/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1239/// be returned.
1241 const APInt &ConstOpnd) {
1242 if (ConstOpnd.isZero())
1243 return nullptr;
1244
1245 if (ConstOpnd.isAllOnes())
1246 return Opnd;
1247
1248 Instruction *I = BinaryOperator::CreateAnd(
1249 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
1250 InsertBefore);
1251 I->setDebugLoc(InsertBefore->getDebugLoc());
1252 return I;
1253}
1254
1255// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1256// into "R ^ C", where C would be 0, and R is a symbolic value.
1257//
1258// If it was successful, true is returned, and the "R" and "C" is returned
1259// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1260// and both "Res" and "ConstOpnd" remain unchanged.
1261bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1262 APInt &ConstOpnd, Value *&Res) {
1263 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1264 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1265 // = (x & ~c1) ^ (c1 ^ c2)
1266 // It is useful only when c1 == c2.
1267 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
1268 return false;
1269
1270 if (!Opnd1->getValue()->hasOneUse())
1271 return false;
1272
1273 const APInt &C1 = Opnd1->getConstPart();
1274 if (C1 != ConstOpnd)
1275 return false;
1276
1277 Value *X = Opnd1->getSymbolicPart();
1278 Res = createAndInstr(It, X, ~C1);
1279 // ConstOpnd was C2, now C1 ^ C2.
1280 ConstOpnd ^= C1;
1281
1282 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1283 RedoInsts.insert(T);
1284 return true;
1285}
1286
1287// Helper function of OptimizeXor(). It tries to simplify
1288// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1289// symbolic value.
1290//
1291// If it was successful, true is returned, and the "R" and "C" is returned
1292// via "Res" and "ConstOpnd", respectively (If the entire expression is
1293// evaluated to a constant, the Res is set to NULL); otherwise, false is
1294// returned, and both "Res" and "ConstOpnd" remain unchanged.
1295bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1296 XorOpnd *Opnd2, APInt &ConstOpnd,
1297 Value *&Res) {
1298 Value *X = Opnd1->getSymbolicPart();
1299 if (X != Opnd2->getSymbolicPart())
1300 return false;
1301
1302 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1303 int DeadInstNum = 1;
1304 if (Opnd1->getValue()->hasOneUse())
1305 DeadInstNum++;
1306 if (Opnd2->getValue()->hasOneUse())
1307 DeadInstNum++;
1308
1309 // Xor-Rule 2:
1310 // (x | c1) ^ (x & c2)
1311 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1312 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1313 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1314 //
1315 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1316 if (Opnd2->isOrExpr())
1317 std::swap(Opnd1, Opnd2);
1318
1319 const APInt &C1 = Opnd1->getConstPart();
1320 const APInt &C2 = Opnd2->getConstPart();
1321 APInt C3((~C1) ^ C2);
1322
1323 // Do not increase code size!
1324 if (!C3.isZero() && !C3.isAllOnes()) {
1325 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1326 if (NewInstNum > DeadInstNum)
1327 return false;
1328 }
1329
1330 Res = createAndInstr(It, X, C3);
1331 ConstOpnd ^= C1;
1332 } else if (Opnd1->isOrExpr()) {
1333 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1334 //
1335 const APInt &C1 = Opnd1->getConstPart();
1336 const APInt &C2 = Opnd2->getConstPart();
1337 APInt C3 = C1 ^ C2;
1338
1339 // Do not increase code size
1340 if (!C3.isZero() && !C3.isAllOnes()) {
1341 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
1342 if (NewInstNum > DeadInstNum)
1343 return false;
1344 }
1345
1346 Res = createAndInstr(It, X, C3);
1347 ConstOpnd ^= C3;
1348 } else {
1349 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1350 //
1351 const APInt &C1 = Opnd1->getConstPart();
1352 const APInt &C2 = Opnd2->getConstPart();
1353 APInt C3 = C1 ^ C2;
1354 Res = createAndInstr(It, X, C3);
1355 }
1356
1357 // Put the original operands in the Redo list; hope they will be deleted
1358 // as dead code.
1359 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
1360 RedoInsts.insert(T);
1361 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
1362 RedoInsts.insert(T);
1363
1364 return true;
1365}
1366
1367/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1368/// to a single Value, it is returned, otherwise the Ops list is mutated as
1369/// necessary.
1370Value *ReassociatePass::OptimizeXor(Instruction *I,
1372 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
1373 return V;
1374
1375 if (Ops.size() == 1)
1376 return nullptr;
1377
1379 SmallVector<XorOpnd*, 8> OpndPtrs;
1380 Type *Ty = Ops[0].Op->getType();
1381 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
1382
1383 // Step 1: Convert ValueEntry to XorOpnd
1384 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1385 Value *V = Ops[i].Op;
1386 const APInt *C;
1387 // TODO: Support non-splat vectors.
1388 if (match(V, m_APInt(C))) {
1389 ConstOpnd ^= *C;
1390 } else {
1391 XorOpnd O(V);
1392 O.setSymbolicRank(getRank(O.getSymbolicPart()));
1393 Opnds.push_back(O);
1394 }
1395 }
1396
1397 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1398 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1399 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1400 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1401 // when new elements are added to the vector.
1402 for (XorOpnd &Op : Opnds)
1403 OpndPtrs.push_back(&Op);
1404
1405 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1406 // the same symbolic value cluster together. For instance, the input operand
1407 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1408 // ("x | 123", "x & 789", "y & 456").
1409 //
1410 // The purpose is twofold:
1411 // 1) Cluster together the operands sharing the same symbolic-value.
1412 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1413 // could potentially shorten crital path, and expose more loop-invariants.
1414 // Note that values' rank are basically defined in RPO order (FIXME).
1415 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1416 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1417 // "z" in the order of X-Y-Z is better than any other orders.
1418 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
1419 return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1420 });
1421
1422 // Step 3: Combine adjacent operands
1423 XorOpnd *PrevOpnd = nullptr;
1424 bool Changed = false;
1425 for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
1426 XorOpnd *CurrOpnd = OpndPtrs[i];
1427 // The combined value
1428 Value *CV;
1429
1430 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1431 if (!ConstOpnd.isZero() &&
1432 CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
1433 Changed = true;
1434 if (CV)
1435 *CurrOpnd = XorOpnd(CV);
1436 else {
1437 CurrOpnd->Invalidate();
1438 continue;
1439 }
1440 }
1441
1442 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1443 PrevOpnd = CurrOpnd;
1444 continue;
1445 }
1446
1447 // step 3.2: When previous and current operands share the same symbolic
1448 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1449 if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
1450 // Remove previous operand
1451 PrevOpnd->Invalidate();
1452 if (CV) {
1453 *CurrOpnd = XorOpnd(CV);
1454 PrevOpnd = CurrOpnd;
1455 } else {
1456 CurrOpnd->Invalidate();
1457 PrevOpnd = nullptr;
1458 }
1459 Changed = true;
1460 }
1461 }
1462
1463 // Step 4: Reassemble the Ops
1464 if (Changed) {
1465 Ops.clear();
1466 for (const XorOpnd &O : Opnds) {
1467 if (O.isInvalid())
1468 continue;
1469 ValueEntry VE(getRank(O.getValue()), O.getValue());
1470 Ops.push_back(VE);
1471 }
1472 if (!ConstOpnd.isZero()) {
1473 Value *C = ConstantInt::get(Ty, ConstOpnd);
1474 ValueEntry VE(getRank(C), C);
1475 Ops.push_back(VE);
1476 }
1477 unsigned Sz = Ops.size();
1478 if (Sz == 1)
1479 return Ops.back().Op;
1480 if (Sz == 0) {
1481 assert(ConstOpnd.isZero());
1482 return ConstantInt::get(Ty, ConstOpnd);
1483 }
1484 }
1485
1486 return nullptr;
1487}
1488
1489/// Optimize a series of operands to an 'add' instruction. This
1490/// optimizes based on identities. If it can be reduced to a single Value, it
1491/// is returned, otherwise the Ops list is mutated as necessary.
1492Value *ReassociatePass::OptimizeAdd(Instruction *I,
1494 // Scan the operand lists looking for X and -X pairs. If we find any, we
1495 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1496 // scan for any
1497 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1498
1499 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1500 Value *TheOp = Ops[i].Op;
1501 // Check to see if we've seen this operand before. If so, we factor all
1502 // instances of the operand together. Due to our sorting criteria, we know
1503 // that these need to be next to each other in the vector.
1504 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
1505 // Rescan the list, remove all instances of this operand from the expr.
1506 unsigned NumFound = 0;
1507 do {
1508 Ops.erase(Ops.begin()+i);
1509 ++NumFound;
1510 } while (i != Ops.size() && Ops[i].Op == TheOp);
1511
1512 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1513 << '\n');
1514 ++NumFactor;
1515
1516 // Insert a new multiply.
1517 Type *Ty = TheOp->getType();
1518 Constant *C = Ty->isIntOrIntVectorTy() ?
1519 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
1520 Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
1521
1522 // Now that we have inserted a multiply, optimize it. This allows us to
1523 // handle cases that require multiple factoring steps, such as this:
1524 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1525 RedoInsts.insert(Mul);
1526
1527 // If every add operand was a duplicate, return the multiply.
1528 if (Ops.empty())
1529 return Mul;
1530
1531 // Otherwise, we had some input that didn't have the dupe, such as
1532 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1533 // things being added by this operation.
1534 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
1535
1536 --i;
1537 e = Ops.size();
1538 continue;
1539 }
1540
1541 // Check for X and -X or X and ~X in the operand list.
1542 Value *X;
1543 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
1544 !match(TheOp, m_FNeg(m_Value(X))))
1545 continue;
1546
1547 unsigned FoundX = FindInOperandList(Ops, i, X);
1548 if (FoundX == i)
1549 continue;
1550
1551 // Remove X and -X from the operand list.
1552 if (Ops.size() == 2 &&
1553 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
1554 return Constant::getNullValue(X->getType());
1555
1556 // Remove X and ~X from the operand list.
1557 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
1558 return Constant::getAllOnesValue(X->getType());
1559
1560 Ops.erase(Ops.begin()+i);
1561 if (i < FoundX)
1562 --FoundX;
1563 else
1564 --i; // Need to back up an extra one.
1565 Ops.erase(Ops.begin()+FoundX);
1566 ++NumAnnihil;
1567 --i; // Revisit element.
1568 e -= 2; // Removed two elements.
1569
1570 // if X and ~X we append -1 to the operand list.
1571 if (match(TheOp, m_Not(m_Value()))) {
1572 Value *V = Constant::getAllOnesValue(X->getType());
1573 Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
1574 e += 1;
1575 }
1576 }
1577
1578 // Scan the operand list, checking to see if there are any common factors
1579 // between operands. Consider something like A*A+A*B*C+D. We would like to
1580 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1581 // To efficiently find this, we count the number of times a factor occurs
1582 // for any ADD operands that are MULs.
1583 DenseMap<Value*, unsigned> FactorOccurrences;
1584
1585 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1586 // where they are actually the same multiply.
1587 unsigned MaxOcc = 0;
1588 Value *MaxOccVal = nullptr;
1589 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1590 BinaryOperator *BOp =
1591 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1592 if (!BOp)
1593 continue;
1594
1595 // Compute all of the factors of this added value.
1596 SmallVector<Value*, 8> Factors;
1597 FindSingleUseMultiplyFactors(BOp, Factors);
1598 assert(Factors.size() > 1 && "Bad linearize!");
1599
1600 // Add one to FactorOccurrences for each unique factor in this op.
1601 SmallPtrSet<Value*, 8> Duplicates;
1602 for (Value *Factor : Factors) {
1603 if (!Duplicates.insert(Factor).second)
1604 continue;
1605
1606 unsigned Occ = ++FactorOccurrences[Factor];
1607 if (Occ > MaxOcc) {
1608 MaxOcc = Occ;
1609 MaxOccVal = Factor;
1610 }
1611
1612 // If Factor is a negative constant, add the negated value as a factor
1613 // because we can percolate the negate out. Watch for minint, which
1614 // cannot be positivified.
1615 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
1616 if (CI->isNegative() && !CI->isMinValue(true)) {
1617 Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1618 if (!Duplicates.insert(Factor).second)
1619 continue;
1620 unsigned Occ = ++FactorOccurrences[Factor];
1621 if (Occ > MaxOcc) {
1622 MaxOcc = Occ;
1623 MaxOccVal = Factor;
1624 }
1625 }
1626 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
1627 if (CF->isNegative()) {
1628 APFloat F(CF->getValueAPF());
1629 F.changeSign();
1630 Factor = ConstantFP::get(CF->getContext(), F);
1631 if (!Duplicates.insert(Factor).second)
1632 continue;
1633 unsigned Occ = ++FactorOccurrences[Factor];
1634 if (Occ > MaxOcc) {
1635 MaxOcc = Occ;
1636 MaxOccVal = Factor;
1637 }
1638 }
1639 }
1640 }
1641 }
1642
1643 // If any factor occurred more than one time, we can pull it out.
1644 if (MaxOcc > 1) {
1645 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1646 << '\n');
1647 ++NumFactor;
1648
1649 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1650 // this, we could otherwise run into situations where removing a factor
1651 // from an expression will drop a use of maxocc, and this can cause
1652 // RemoveFactorFromExpression on successive values to behave differently.
1653 Instruction *DummyInst =
1654 I->getType()->isIntOrIntVectorTy()
1655 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
1656 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
1657
1659 for (unsigned i = 0; i != Ops.size(); ++i) {
1660 // Only try to remove factors from expressions we're allowed to.
1661 BinaryOperator *BOp =
1662 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
1663 if (!BOp)
1664 continue;
1665
1666 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1667 // The factorized operand may occur several times. Convert them all in
1668 // one fell swoop.
1669 for (unsigned j = Ops.size(); j != i;) {
1670 --j;
1671 if (Ops[j].Op == Ops[i].Op) {
1672 NewMulOps.push_back(V);
1673 Ops.erase(Ops.begin()+j);
1674 }
1675 }
1676 --i;
1677 }
1678 }
1679
1680 // No need for extra uses anymore.
1681 DummyInst->deleteValue();
1682
1683 unsigned NumAddedValues = NewMulOps.size();
1684 Value *V = EmitAddTreeOfValues(I->getIterator(), NewMulOps);
1685
1686 // Now that we have inserted the add tree, optimize it. This allows us to
1687 // handle cases that require multiple factoring steps, such as this:
1688 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1689 assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1690 (void)NumAddedValues;
1691 if (Instruction *VI = dyn_cast<Instruction>(V))
1692 RedoInsts.insert(VI);
1693
1694 // Create the multiply.
1695 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
1696
1697 // Rerun associate on the multiply in case the inner expression turned into
1698 // a multiply. We want to make sure that we keep things in canonical form.
1699 RedoInsts.insert(V2);
1700
1701 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1702 // entire result expression is just the multiply "A*(B+C)".
1703 if (Ops.empty())
1704 return V2;
1705
1706 // Otherwise, we had some input that didn't have the factor, such as
1707 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1708 // things being added by this operation.
1709 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1710 }
1711
1712 return nullptr;
1713}
1714
1715/// Build up a vector of value/power pairs factoring a product.
1716///
1717/// Given a series of multiplication operands, build a vector of factors and
1718/// the powers each is raised to when forming the final product. Sort them in
1719/// the order of descending power.
1720///
1721/// (x*x) -> [(x, 2)]
1722/// ((x*x)*x) -> [(x, 3)]
1723/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1724///
1725/// \returns Whether any factors have a power greater than one.
1727 SmallVectorImpl<Factor> &Factors) {
1728 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1729 // Compute the sum of powers of simplifiable factors.
1730 unsigned FactorPowerSum = 0;
1731 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1732 Value *Op = Ops[Idx-1].Op;
1733
1734 // Count the number of occurrences of this value.
1735 unsigned Count = 1;
1736 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1737 ++Count;
1738 // Track for simplification all factors which occur 2 or more times.
1739 if (Count > 1)
1740 FactorPowerSum += Count;
1741 }
1742
1743 // We can only simplify factors if the sum of the powers of our simplifiable
1744 // factors is 4 or higher. When that is the case, we will *always* have
1745 // a simplification. This is an important invariant to prevent cyclicly
1746 // trying to simplify already minimal formations.
1747 if (FactorPowerSum < 4)
1748 return false;
1749
1750 // Now gather the simplifiable factors, removing them from Ops.
1751 FactorPowerSum = 0;
1752 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1753 Value *Op = Ops[Idx-1].Op;
1754
1755 // Count the number of occurrences of this value.
1756 unsigned Count = 1;
1757 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1758 ++Count;
1759 if (Count == 1)
1760 continue;
1761 // Move an even number of occurrences to Factors.
1762 Count &= ~1U;
1763 Idx -= Count;
1764 FactorPowerSum += Count;
1765 Factors.push_back(Factor(Op, Count));
1766 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1767 }
1768
1769 // None of the adjustments above should have reduced the sum of factor powers
1770 // below our mininum of '4'.
1771 assert(FactorPowerSum >= 4);
1772
1773 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
1774 return LHS.Power > RHS.Power;
1775 });
1776 return true;
1777}
1778
1779/// Build a tree of multiplies, computing the product of Ops.
1782 if (Ops.size() == 1)
1783 return Ops.back();
1784
1785 Value *LHS = Ops.pop_back_val();
1786 do {
1787 if (LHS->getType()->isIntOrIntVectorTy())
1788 LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1789 else
1790 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
1791 } while (!Ops.empty());
1792
1793 return LHS;
1794}
1795
1796/// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1797///
1798/// Given a vector of values raised to various powers, where no two values are
1799/// equal and the powers are sorted in decreasing order, compute the minimal
1800/// DAG of multiplies to compute the final product, and return that product
1801/// value.
1802Value *
1803ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1804 SmallVectorImpl<Factor> &Factors) {
1805 assert(Factors[0].Power);
1806 SmallVector<Value *, 4> OuterProduct;
1807 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1808 Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1809 if (Factors[Idx].Power != Factors[LastIdx].Power) {
1810 LastIdx = Idx;
1811 continue;
1812 }
1813
1814 // We want to multiply across all the factors with the same power so that
1815 // we can raise them to that power as a single entity. Build a mini tree
1816 // for that.
1817 SmallVector<Value *, 4> InnerProduct;
1818 InnerProduct.push_back(Factors[LastIdx].Base);
1819 do {
1820 InnerProduct.push_back(Factors[Idx].Base);
1821 ++Idx;
1822 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1823
1824 // Reset the base value of the first factor to the new expression tree.
1825 // We'll remove all the factors with the same power in a second pass.
1826 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1827 if (Instruction *MI = dyn_cast<Instruction>(M))
1828 RedoInsts.insert(MI);
1829
1830 LastIdx = Idx;
1831 }
1832 // Unique factors with equal powers -- we've folded them into the first one's
1833 // base.
1834 Factors.erase(llvm::unique(Factors,
1835 [](const Factor &LHS, const Factor &RHS) {
1836 return LHS.Power == RHS.Power;
1837 }),
1838 Factors.end());
1839
1840 // Iteratively collect the base of each factor with an add power into the
1841 // outer product, and halve each power in preparation for squaring the
1842 // expression.
1843 for (Factor &F : Factors) {
1844 if (F.Power & 1)
1845 OuterProduct.push_back(F.Base);
1846 F.Power >>= 1;
1847 }
1848 if (Factors[0].Power) {
1849 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1850 OuterProduct.push_back(SquareRoot);
1851 OuterProduct.push_back(SquareRoot);
1852 }
1853 if (OuterProduct.size() == 1)
1854 return OuterProduct.front();
1855
1856 Value *V = buildMultiplyTree(Builder, OuterProduct);
1857 return V;
1858}
1859
1860Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
1862 // We can only optimize the multiplies when there is a chain of more than
1863 // three, such that a balanced tree might require fewer total multiplies.
1864 if (Ops.size() < 4)
1865 return nullptr;
1866
1867 // Try to turn linear trees of multiplies without other uses of the
1868 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1869 // re-use.
1870 SmallVector<Factor, 4> Factors;
1871 if (!collectMultiplyFactors(Ops, Factors))
1872 return nullptr; // All distinct factors, so nothing left for us to do.
1873
1874 IRBuilder<> Builder(I);
1875 // The reassociate transformation for FP operations is performed only
1876 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1877 // to the newly generated operations.
1878 if (auto FPI = dyn_cast<FPMathOperator>(I))
1879 Builder.setFastMathFlags(FPI->getFastMathFlags());
1880
1881 Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1882 if (Ops.empty())
1883 return V;
1884
1885 ValueEntry NewEntry = ValueEntry(getRank(V), V);
1886 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
1887 return nullptr;
1888}
1889
1890Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
1892 // Now that we have the linearized expression tree, try to optimize it.
1893 // Start by folding any constants that we found.
1894 const DataLayout &DL = I->getDataLayout();
1895 Constant *Cst = nullptr;
1896 unsigned Opcode = I->getOpcode();
1897 while (!Ops.empty()) {
1898 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
1899 if (!Cst) {
1900 Ops.pop_back();
1901 Cst = C;
1902 continue;
1903 }
1904 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
1905 Ops.pop_back();
1906 Cst = Res;
1907 continue;
1908 }
1909 }
1910 break;
1911 }
1912 // If there was nothing but constants then we are done.
1913 if (Ops.empty())
1914 return Cst;
1915
1916 // Put the combined constant back at the end of the operand list, except if
1917 // there is no point. For example, an add of 0 gets dropped here, while a
1918 // multiplication by zero turns the whole expression into zero.
1919 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
1920 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
1921 return Cst;
1922 Ops.push_back(ValueEntry(0, Cst));
1923 }
1924
1925 if (Ops.size() == 1) return Ops[0].Op;
1926
1927 // Handle destructive annihilation due to identities between elements in the
1928 // argument list here.
1929 unsigned NumOps = Ops.size();
1930 switch (Opcode) {
1931 default: break;
1932 case Instruction::And:
1933 case Instruction::Or:
1934 if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1935 return Result;
1936 break;
1937
1938 case Instruction::Xor:
1939 if (Value *Result = OptimizeXor(I, Ops))
1940 return Result;
1941 break;
1942
1943 case Instruction::Add:
1944 case Instruction::FAdd:
1945 if (Value *Result = OptimizeAdd(I, Ops))
1946 return Result;
1947 break;
1948
1949 case Instruction::Mul:
1950 case Instruction::FMul:
1951 if (Value *Result = OptimizeMul(I, Ops))
1952 return Result;
1953 break;
1954 }
1955
1956 if (Ops.size() != NumOps)
1957 return OptimizeExpression(I, Ops);
1958 return nullptr;
1959}
1960
1961// Remove dead instructions and if any operands are trivially dead add them to
1962// Insts so they will be removed as well.
1963void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
1964 OrderedSet &Insts) {
1965 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1966 SmallVector<Value *, 4> Ops(I->operands());
1967 ValueRankMap.erase(I);
1968 Insts.remove(I);
1969 RedoInsts.remove(I);
1971 I->eraseFromParent();
1972 for (auto *Op : Ops)
1973 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1974 if (OpInst->use_empty())
1975 Insts.insert(OpInst);
1976}
1977
1978/// Zap the given instruction, adding interesting operands to the work list.
1979void ReassociatePass::EraseInst(Instruction *I) {
1980 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
1981 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
1982
1983 SmallVector<Value *, 8> Ops(I->operands());
1984 // Erase the dead instruction.
1985 ValueRankMap.erase(I);
1986 RedoInsts.remove(I);
1988 I->eraseFromParent();
1989 // Optimize its operands.
1990 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
1991 for (Value *V : Ops)
1992 if (Instruction *Op = dyn_cast<Instruction>(V)) {
1993 // If this is a node in an expression tree, climb to the expression root
1994 // and add that since that's where optimization actually happens.
1995 unsigned Opcode = Op->getOpcode();
1996 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
1997 Visited.insert(Op).second)
1998 Op = Op->user_back();
1999
2000 // The instruction we're going to push may be coming from a
2001 // dead block, and Reassociate skips the processing of unreachable
2002 // blocks because it's a waste of time and also because it can
2003 // lead to infinite loop due to LLVM's non-standard definition
2004 // of dominance.
2005 if (ValueRankMap.contains(Op))
2006 RedoInsts.insert(Op);
2007 }
2008
2009 MadeChange = true;
2010}
2011
2012/// Recursively analyze an expression to build a list of instructions that have
2013/// negative floating-point constant operands. The caller can then transform
2014/// the list to create positive constants for better reassociation and CSE.
2016 SmallVectorImpl<Instruction *> &Candidates) {
2017 // Handle only one-use instructions. Combining negations does not justify
2018 // replicating instructions.
2019 Instruction *I;
2020 if (!match(V, m_OneUse(m_Instruction(I))))
2021 return;
2022
2023 // Handle expressions of multiplications and divisions.
2024 // TODO: This could look through floating-point casts.
2025 const APFloat *C;
2026 switch (I->getOpcode()) {
2027 case Instruction::FMul:
2028 // Not expecting non-canonical code here. Bail out and wait.
2029 if (match(I->getOperand(0), m_Constant()))
2030 break;
2031
2032 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) {
2033 Candidates.push_back(I);
2034 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n');
2035 }
2036 getNegatibleInsts(I->getOperand(0), Candidates);
2037 getNegatibleInsts(I->getOperand(1), Candidates);
2038 break;
2039 case Instruction::FDiv:
2040 // Not expecting non-canonical code here. Bail out and wait.
2041 if (match(I->getOperand(0), m_Constant()) &&
2042 match(I->getOperand(1), m_Constant()))
2043 break;
2044
2045 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) ||
2046 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) {
2047 Candidates.push_back(I);
2048 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n');
2049 }
2050 getNegatibleInsts(I->getOperand(0), Candidates);
2051 getNegatibleInsts(I->getOperand(1), Candidates);
2052 break;
2053 default:
2054 break;
2055 }
2056}
2057
2058/// Given an fadd/fsub with an operand that is a one-use instruction
2059/// (the fadd/fsub), try to change negative floating-point constants into
2060/// positive constants to increase potential for reassociation and CSE.
2061Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I,
2062 Instruction *Op,
2063 Value *OtherOp) {
2064 assert((I->getOpcode() == Instruction::FAdd ||
2065 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2066
2067 // Collect instructions with negative FP constants from the subtree that ends
2068 // in Op.
2070 getNegatibleInsts(Op, Candidates);
2071 if (Candidates.empty())
2072 return nullptr;
2073
2074 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2075 // resulting subtract will be broken up later. This can get us into an
2076 // infinite loop during reassociation.
2077 bool IsFSub = I->getOpcode() == Instruction::FSub;
2078 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1;
2079 if (NeedsSubtract && ShouldBreakUpSubtract(I))
2080 return nullptr;
2081
2082 for (Instruction *Negatible : Candidates) {
2083 const APFloat *C;
2084 if (match(Negatible->getOperand(0), m_APFloat(C))) {
2085 assert(!match(Negatible->getOperand(1), m_Constant()) &&
2086 "Expecting only 1 constant operand");
2087 assert(C->isNegative() && "Expected negative FP constant");
2088 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C)));
2089 MadeChange = true;
2090 }
2091 if (match(Negatible->getOperand(1), m_APFloat(C))) {
2092 assert(!match(Negatible->getOperand(0), m_Constant()) &&
2093 "Expecting only 1 constant operand");
2094 assert(C->isNegative() && "Expected negative FP constant");
2095 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C)));
2096 MadeChange = true;
2097 }
2098 }
2099 assert(MadeChange == true && "Negative constant candidate was not changed");
2100
2101 // Negations cancelled out.
2102 if (Candidates.size() % 2 == 0)
2103 return I;
2104
2105 // Negate the final operand in the expression by flipping the opcode of this
2106 // fadd/fsub.
2107 assert(Candidates.size() % 2 == 1 && "Expected odd number");
2108 IRBuilder<> Builder(I);
2109 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I)
2110 : Builder.CreateFSubFMF(OtherOp, Op, I);
2111 I->replaceAllUsesWith(NewInst);
2112 RedoInsts.insert(I);
2113 return dyn_cast<Instruction>(NewInst);
2114}
2115
2116/// Canonicalize expressions that contain a negative floating-point constant
2117/// of the following form:
2118/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2119/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2120/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2121///
2122/// The fadd/fsub opcode may be switched to allow folding a negation into the
2123/// input instruction.
2124Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) {
2125 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n');
2126 Value *X;
2127 Instruction *Op;
2129 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2130 I = R;
2132 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2133 I = R;
2135 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X))
2136 I = R;
2137 return I;
2138}
2139
2140/// Inspect and optimize the given instruction. Note that erasing
2141/// instructions is not allowed.
2142void ReassociatePass::OptimizeInst(Instruction *I) {
2143 // Only consider operations that we understand.
2144 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
2145 return;
2146
2147 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
2148 // If an operand of this shift is a reassociable multiply, or if the shift
2149 // is used by a reassociable multiply or add, turn into a multiply.
2150 if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
2151 (I->hasOneUse() &&
2152 (isReassociableOp(I->user_back(), Instruction::Mul) ||
2153 isReassociableOp(I->user_back(), Instruction::Add)))) {
2155 RedoInsts.insert(I);
2156 MadeChange = true;
2157 I = NI;
2158 }
2159
2160 // Commute binary operators, to canonicalize the order of their operands.
2161 // This can potentially expose more CSE opportunities, and makes writing other
2162 // transformations simpler.
2163 if (I->isCommutative())
2164 canonicalizeOperands(I);
2165
2166 // Canonicalize negative constants out of expressions.
2167 if (Instruction *Res = canonicalizeNegFPConstants(I))
2168 I = Res;
2169
2170 // Don't optimize floating-point instructions unless they have the
2171 // appropriate FastMathFlags for reassociation enabled.
2172 if (isa<FPMathOperator>(I) && !hasFPAssociativeFlags(I))
2173 return;
2174
2175 // Do not reassociate boolean (i1) expressions. We want to preserve the
2176 // original order of evaluation for short-circuited comparisons that
2177 // SimplifyCFG has folded to AND/OR expressions. If the expression
2178 // is not further optimized, it is likely to be transformed back to a
2179 // short-circuited form for code gen, and the source order may have been
2180 // optimized for the most likely conditions.
2181 if (I->getType()->isIntegerTy(1))
2182 return;
2183
2184 // If this is a bitwise or instruction of operands
2185 // with no common bits set, convert it to X+Y.
2186 if (I->getOpcode() == Instruction::Or &&
2188 (cast<PossiblyDisjointInst>(I)->isDisjoint() ||
2189 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
2190 SimplifyQuery(I->getDataLayout(),
2191 /*DT=*/nullptr, /*AC=*/nullptr, I)))) {
2193 RedoInsts.insert(I);
2194 MadeChange = true;
2195 I = NI;
2196 }
2197
2198 // If this is a subtract instruction which is not already in negate form,
2199 // see if we can convert it to X+-Y.
2200 if (I->getOpcode() == Instruction::Sub) {
2201 if (ShouldBreakUpSubtract(I)) {
2202 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2203 RedoInsts.insert(I);
2204 MadeChange = true;
2205 I = NI;
2206 } else if (match(I, m_Neg(m_Value()))) {
2207 // Otherwise, this is a negation. See if the operand is a multiply tree
2208 // and if this is not an inner node of a multiply tree.
2209 if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
2210 (!I->hasOneUse() ||
2211 !isReassociableOp(I->user_back(), Instruction::Mul))) {
2213 // If the negate was simplified, revisit the users to see if we can
2214 // reassociate further.
2215 for (User *U : NI->users()) {
2216 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2217 RedoInsts.insert(Tmp);
2218 }
2219 RedoInsts.insert(I);
2220 MadeChange = true;
2221 I = NI;
2222 }
2223 }
2224 } else if (I->getOpcode() == Instruction::FNeg ||
2225 I->getOpcode() == Instruction::FSub) {
2226 if (ShouldBreakUpSubtract(I)) {
2227 Instruction *NI = BreakUpSubtract(I, RedoInsts);
2228 RedoInsts.insert(I);
2229 MadeChange = true;
2230 I = NI;
2231 } else if (match(I, m_FNeg(m_Value()))) {
2232 // Otherwise, this is a negation. See if the operand is a multiply tree
2233 // and if this is not an inner node of a multiply tree.
2234 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
2235 I->getOperand(0);
2236 if (isReassociableOp(Op, Instruction::FMul) &&
2237 (!I->hasOneUse() ||
2238 !isReassociableOp(I->user_back(), Instruction::FMul))) {
2239 // If the negate was simplified, revisit the users to see if we can
2240 // reassociate further.
2242 for (User *U : NI->users()) {
2243 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
2244 RedoInsts.insert(Tmp);
2245 }
2246 RedoInsts.insert(I);
2247 MadeChange = true;
2248 I = NI;
2249 }
2250 }
2251 }
2252
2253 // If this instruction is an associative binary operator, process it.
2254 if (!I->isAssociative()) return;
2255 BinaryOperator *BO = cast<BinaryOperator>(I);
2256
2257 // If this is an interior node of a reassociable tree, ignore it until we
2258 // get to the root of the tree, to avoid N^2 analysis.
2259 unsigned Opcode = BO->getOpcode();
2260 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2261 // During the initial run we will get to the root of the tree.
2262 // But if we get here while we are redoing instructions, there is no
2263 // guarantee that the root will be visited. So Redo later
2264 if (BO->user_back() != BO &&
2265 BO->getParent() == BO->user_back()->getParent())
2266 RedoInsts.insert(BO->user_back());
2267 return;
2268 }
2269
2270 // If this is an add tree that is used by a sub instruction, ignore it
2271 // until we process the subtract.
2272 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2273 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
2274 return;
2275 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2276 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
2277 return;
2278
2279 ReassociateExpression(BO);
2280}
2281
2282void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2283 // First, walk the expression tree, linearizing the tree, collecting the
2284 // operand information.
2287 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags);
2289 Ops.reserve(Tree.size());
2290 for (const RepeatedValue &E : Tree)
2291 Ops.append(E.second, ValueEntry(getRank(E.first), E.first));
2292
2293 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
2294
2295 // Now that we have linearized the tree to a list and have gathered all of
2296 // the operands and their ranks, sort the operands by their rank. Use a
2297 // stable_sort so that values with equal ranks will have their relative
2298 // positions maintained (and so the compiler is deterministic). Note that
2299 // this sorts so that the highest ranking values end up at the beginning of
2300 // the vector.
2301 llvm::stable_sort(Ops);
2302
2303 // Now that we have the expression tree in a convenient
2304 // sorted form, optimize it globally if possible.
2305 if (Value *V = OptimizeExpression(I, Ops)) {
2306 if (V == I)
2307 // Self-referential expression in unreachable code.
2308 return;
2309 // This expression tree simplified to something that isn't a tree,
2310 // eliminate it.
2311 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
2312 I->replaceAllUsesWith(V);
2313 if (Instruction *VI = dyn_cast<Instruction>(V))
2314 if (I->getDebugLoc())
2315 VI->setDebugLoc(I->getDebugLoc());
2316 RedoInsts.insert(I);
2317 ++NumAnnihil;
2318 return;
2319 }
2320
2321 // We want to sink immediates as deeply as possible except in the case where
2322 // this is a multiply tree used only by an add, and the immediate is a -1.
2323 // In this case we reassociate to put the negation on the outside so that we
2324 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2325 if (I->hasOneUse()) {
2326 if (I->getOpcode() == Instruction::Mul &&
2327 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
2328 isa<ConstantInt>(Ops.back().Op) &&
2329 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
2330 ValueEntry Tmp = Ops.pop_back_val();
2331 Ops.insert(Ops.begin(), Tmp);
2332 } else if (I->getOpcode() == Instruction::FMul &&
2333 cast<Instruction>(I->user_back())->getOpcode() ==
2334 Instruction::FAdd &&
2335 isa<ConstantFP>(Ops.back().Op) &&
2336 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
2337 ValueEntry Tmp = Ops.pop_back_val();
2338 Ops.insert(Ops.begin(), Tmp);
2339 }
2340 }
2341
2342 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
2343
2344 if (Ops.size() == 1) {
2345 if (Ops[0].Op == I)
2346 // Self-referential expression in unreachable code.
2347 return;
2348
2349 // This expression tree simplified to something that isn't a tree,
2350 // eliminate it.
2351 I->replaceAllUsesWith(Ops[0].Op);
2352 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
2353 OI->setDebugLoc(I->getDebugLoc());
2354 RedoInsts.insert(I);
2355 return;
2356 }
2357
2358 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2359 // Find the pair with the highest count in the pairmap and move it to the
2360 // back of the list so that it can later be CSE'd.
2361 // example:
2362 // a*b*c*d*e
2363 // if c*e is the most "popular" pair, we can express this as
2364 // (((c*e)*d)*b)*a
2365 unsigned Max = 1;
2366 unsigned BestRank = 0;
2367 std::pair<unsigned, unsigned> BestPair;
2368 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2369 unsigned LimitIdx = 0;
2370 // With the CSE-driven heuristic, we are about to slap two values at the
2371 // beginning of the expression whereas they could live very late in the CFG.
2372 // When using the CSE-local heuristic we avoid creating dependences from
2373 // completely unrelated part of the CFG by limiting the expression
2374 // reordering on the values that live in the first seen basic block.
2375 // The main idea is that we want to avoid forming expressions that would
2376 // become loop dependent.
2377 if (UseCSELocalOpt) {
2378 const BasicBlock *FirstSeenBB = nullptr;
2379 int StartIdx = Ops.size() - 1;
2380 // Skip the first value of the expression since we need at least two
2381 // values to materialize an expression. I.e., even if this value is
2382 // anchored in a different basic block, the actual first sub expression
2383 // will be anchored on the second value.
2384 for (int i = StartIdx - 1; i != -1; --i) {
2385 const Value *Val = Ops[i].Op;
2386 const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2387 const BasicBlock *SeenBB = nullptr;
2388 if (!CurrLeafInstr) {
2389 // The value is free of any CFG dependencies.
2390 // Do as if it lives in the entry block.
2391 //
2392 // We do this to make sure all the values falling on this path are
2393 // seen through the same anchor point. The rationale is these values
2394 // can be combined together to from a sub expression free of any CFG
2395 // dependencies so we want them to stay together.
2396 // We could be cleverer and postpone the anchor down to the first
2397 // anchored value, but that's likely complicated to get right.
2398 // E.g., we wouldn't want to do that if that means being stuck in a
2399 // loop.
2400 //
2401 // For instance, we wouldn't want to change:
2402 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2403 // into
2404 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2405 // Because all the sub expressions with arg2..N would be stuck between
2406 // two loop dependent values.
2407 SeenBB = &I->getParent()->getParent()->getEntryBlock();
2408 } else {
2409 SeenBB = CurrLeafInstr->getParent();
2410 }
2411
2412 if (!FirstSeenBB) {
2413 FirstSeenBB = SeenBB;
2414 continue;
2415 }
2416 if (FirstSeenBB != SeenBB) {
2417 // ith value is in a different basic block.
2418 // Rewind the index once to point to the last value on the same basic
2419 // block.
2420 LimitIdx = i + 1;
2421 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2422 << LimitIdx << ", " << StartIdx << "]\n");
2423 break;
2424 }
2425 }
2426 }
2427 for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
2428 // We must use int type to go below zero when LimitIdx is 0.
2429 for (int j = i - 1; j >= (int)LimitIdx; --j) {
2430 unsigned Score = 0;
2431 Value *Op0 = Ops[i].Op;
2432 Value *Op1 = Ops[j].Op;
2433 if (std::less<Value *>()(Op1, Op0))
2434 std::swap(Op0, Op1);
2435 auto it = PairMap[Idx].find({Op0, Op1});
2436 if (it != PairMap[Idx].end()) {
2437 // Functions like BreakUpSubtract() can erase the Values we're using
2438 // as keys and create new Values after we built the PairMap. There's a
2439 // small chance that the new nodes can have the same address as
2440 // something already in the table. We shouldn't accumulate the stored
2441 // score in that case as it refers to the wrong Value.
2442 if (it->second.isValid())
2443 Score += it->second.Score;
2444 }
2445
2446 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
2447
2448 // By construction, the operands are sorted in reverse order of their
2449 // topological order.
2450 // So we tend to form (sub) expressions with values that are close to
2451 // each other.
2452 //
2453 // Now to expose more CSE opportunities we want to expose the pair of
2454 // operands that occur the most (as statically computed in
2455 // BuildPairMap.) as the first sub-expression.
2456 //
2457 // If two pairs occur as many times, we pick the one with the
2458 // lowest rank, meaning the one with both operands appearing first in
2459 // the topological order.
2460 if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2461 BestPair = {j, i};
2462 Max = Score;
2463 BestRank = MaxRank;
2464 }
2465 }
2466 }
2467 if (Max > 1) {
2468 auto Op0 = Ops[BestPair.first];
2469 auto Op1 = Ops[BestPair.second];
2470 Ops.erase(&Ops[BestPair.second]);
2471 Ops.erase(&Ops[BestPair.first]);
2472 Ops.push_back(Op0);
2473 Ops.push_back(Op1);
2474 }
2475 }
2476 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2477 dbgs() << '\n');
2478 // Now that we ordered and optimized the expressions, splat them back into
2479 // the expression tree, removing any unneeded nodes.
2480 RewriteExprTree(I, Ops, Flags);
2481}
2482
2483void
2484ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2485 // Make a "pairmap" of how often each operand pair occurs.
2486 for (BasicBlock *BI : RPOT) {
2487 for (Instruction &I : *BI) {
2488 if (!I.isAssociative() || !I.isBinaryOp())
2489 continue;
2490
2491 // Ignore nodes that aren't at the root of trees.
2492 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2493 continue;
2494
2495 // Collect all operands in a single reassociable expression.
2496 // Since Reassociate has already been run once, we can assume things
2497 // are already canonical according to Reassociation's regime.
2498 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) };
2500 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2501 Value *Op = Worklist.pop_back_val();
2502 Instruction *OpI = dyn_cast<Instruction>(Op);
2503 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) {
2504 Ops.push_back(Op);
2505 continue;
2506 }
2507 // Be paranoid about self-referencing expressions in unreachable code.
2508 if (OpI->getOperand(0) != OpI)
2509 Worklist.push_back(OpI->getOperand(0));
2510 if (OpI->getOperand(1) != OpI)
2511 Worklist.push_back(OpI->getOperand(1));
2512 }
2513 // Skip extremely long expressions.
2514 if (Ops.size() > GlobalReassociateLimit)
2515 continue;
2516
2517 // Add all pairwise combinations of operands to the pair map.
2518 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2520 for (unsigned i = 0; i < Ops.size() - 1; ++i) {
2521 for (unsigned j = i + 1; j < Ops.size(); ++j) {
2522 // Canonicalize operand orderings.
2523 Value *Op0 = Ops[i];
2524 Value *Op1 = Ops[j];
2525 if (std::less<Value *>()(Op1, Op0))
2526 std::swap(Op0, Op1);
2527 if (!Visited.insert({Op0, Op1}).second)
2528 continue;
2529 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
2530 if (!res.second) {
2531 // If either key value has been erased then we've got the same
2532 // address by coincidence. That can't happen here because nothing is
2533 // erasing values but it can happen by the time we're querying the
2534 // map.
2535 assert(res.first->second.isValid() && "WeakVH invalidated");
2536 ++res.first->second.Score;
2537 }
2538 }
2539 }
2540 }
2541 }
2542}
2543
2545 // Get the functions basic blocks in Reverse Post Order. This order is used by
2546 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2547 // blocks (it has been seen that the analysis in this pass could hang when
2548 // analysing dead basic blocks).
2550
2551 // Calculate the rank map for F.
2552 BuildRankMap(F, RPOT);
2553
2554 // Build the pair map before running reassociate.
2555 // Technically this would be more accurate if we did it after one round
2556 // of reassociation, but in practice it doesn't seem to help much on
2557 // real-world code, so don't waste the compile time running reassociate
2558 // twice.
2559 // If a user wants, they could expicitly run reassociate twice in their
2560 // pass pipeline for further potential gains.
2561 // It might also be possible to update the pair map during runtime, but the
2562 // overhead of that may be large if there's many reassociable chains.
2563 BuildPairMap(RPOT);
2564
2565 MadeChange = false;
2566
2567 // Traverse the same blocks that were analysed by BuildRankMap.
2568 for (BasicBlock *BI : RPOT) {
2569 assert(RankMap.count(&*BI) && "BB should be ranked.");
2570 // Optimize every instruction in the basic block.
2571 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2573 EraseInst(&*II++);
2574 } else {
2575 OptimizeInst(&*II);
2576 assert(II->getParent() == &*BI && "Moved to a different block!");
2577 ++II;
2578 }
2579
2580 // Make a copy of all the instructions to be redone so we can remove dead
2581 // instructions.
2582 OrderedSet ToRedo(RedoInsts);
2583 // Iterate over all instructions to be reevaluated and remove trivially dead
2584 // instructions. If any operand of the trivially dead instruction becomes
2585 // dead mark it for deletion as well. Continue this process until all
2586 // trivially dead instructions have been removed.
2587 while (!ToRedo.empty()) {
2588 Instruction *I = ToRedo.pop_back_val();
2590 RecursivelyEraseDeadInsts(I, ToRedo);
2591 MadeChange = true;
2592 }
2593 }
2594
2595 // Now that we have removed dead instructions, we can reoptimize the
2596 // remaining instructions.
2597 while (!RedoInsts.empty()) {
2598 Instruction *I = RedoInsts.front();
2599 RedoInsts.erase(RedoInsts.begin());
2601 EraseInst(I);
2602 else
2603 OptimizeInst(I);
2604 }
2605 }
2606
2607 // We are done with the rank map and pair map.
2608 RankMap.clear();
2609 ValueRankMap.clear();
2610 for (auto &Entry : PairMap)
2611 Entry.clear();
2612
2613 if (MadeChange) {
2616 return PA;
2617 }
2618
2619 return PreservedAnalyses::all();
2620}
2621
2622namespace {
2623
2624 class ReassociateLegacyPass : public FunctionPass {
2625 ReassociatePass Impl;
2626
2627 public:
2628 static char ID; // Pass identification, replacement for typeid
2629
2630 ReassociateLegacyPass() : FunctionPass(ID) {
2632 }
2633
2634 bool runOnFunction(Function &F) override {
2635 if (skipFunction(F))
2636 return false;
2637
2638 FunctionAnalysisManager DummyFAM;
2639 auto PA = Impl.run(F, DummyFAM);
2640 return !PA.areAllPreserved();
2641 }
2642
2643 void getAnalysisUsage(AnalysisUsage &AU) const override {
2644 AU.setPreservesCFG();
2648 }
2649 };
2650
2651} // end anonymous namespace
2652
2653char ReassociateLegacyPass::ID = 0;
2654
2655INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2656 "Reassociate expressions", false, false)
2657
2658// Public interface to the Reassociate pass
2660 return new ReassociateLegacyPass();
2661}
@ Poison
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, ScalarEvolution *SE, LoopInfo *LI)
isInteresting - Test whether the given expression is "interesting" when used by the given expression,...
Definition: IVUsers.cpp:56
static bool isReassociableOp(Instruction *I, unsigned IntOpcode, unsigned FPOpcode)
Definition: LICM.cpp:2676
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
nary reassociate
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static void PrintOps(Instruction *I, const SmallVectorImpl< ValueEntry > &Ops)
Print out the expression identified in the Ops list.
Definition: Reassociate.cpp:82
static Value * EmitAddTreeOfValues(BasicBlock::iterator It, SmallVectorImpl< WeakTrackingVH > &Ops)
Emit a tree of add instructions, summing Ops together and returning the result.
static bool ShouldBreakUpSubtract(Instruction *Sub)
Return true if we should break up this subtract of X-Y into (X + -Y).
static Value * buildMultiplyTree(IRBuilderBase &Builder, SmallVectorImpl< Value * > &Ops)
Build a tree of multiplies, computing the product of Ops.
static void getNegatibleInsts(Value *V, SmallVectorImpl< Instruction * > &Candidates)
Recursively analyze an expression to build a list of instructions that have negative floating-point c...
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static BinaryOperator * BreakUpSubtract(Instruction *Sub, ReassociatePass::OrderedSet &ToRedo)
If we have (X-Y), and if either X is an add, or if this is only used by an add, transform this into (...
static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl< Value * > &Factors)
If V is a single-use multiply, recursively add its operands as factors, otherwise add V to the list o...
std::pair< Value *, uint64_t > RepeatedValue
static Value * OptimizeAndOrXor(unsigned Opcode, SmallVectorImpl< ValueEntry > &Ops)
Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
static BinaryOperator * convertOrWithNoCommonBitsToAdd(Instruction *Or)
If we have (X|Y), and iff X and Y have no common bits set, transform this into (X+Y) to allow arithme...
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool LinearizeExprTree(Instruction *I, SmallVectorImpl< RepeatedValue > &Ops, ReassociatePass::OrderedSet &ToRedo, reassociate::OverflowTracking &Flags)
Given an associative binary expression, return the leaf nodes in Ops along with their weights (how ma...
static bool collectMultiplyFactors(SmallVectorImpl< ValueEntry > &Ops, SmallVectorImpl< Factor > &Factors)
Build up a vector of value/power pairs factoring a product.
static BinaryOperator * ConvertShiftToMul(Instruction *Shl)
If this is a shift of a reassociable multiply or is used by one, change this into a multiply by a con...
static cl::opt< bool > UseCSELocalOpt(DEBUG_TYPE "-use-cse-local", cl::desc("Only reorder expressions within a basic block " "when exposing CSE opportunities"), cl::init(true), cl::Hidden)
static unsigned FindInOperandList(const SmallVectorImpl< ValueEntry > &Ops, unsigned i, Value *X)
Scan backwards and forwards among values with the same rank as element i to see if X exists.
static BinaryOperator * LowerNegateToMultiply(Instruction *Neg)
Replace 0-X with X*-1.
static Instruction * CreateNeg(Value *S1, const Twine &Name, BasicBlock::iterator InsertBefore, Value *FlagsOp)
static bool hasFPAssociativeFlags(Instruction *I)
Return true if I is an instruction with the FastMathFlags that are needed for general reassociation s...
static Value * createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd, const APInt &ConstOpnd)
Helper function of CombineXorOpnd().
static Value * NegateValue(Value *V, Instruction *BI, ReassociatePass::OrderedSet &ToRedo)
Insert instructions before the instruction pointed to by BI, that computes the negative version of th...
static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or)
Return true if it may be profitable to convert this (X|Y) into (X+Y).
#define DEBUG_TYPE
Definition: Reassociate.cpp:68
static bool isLoadCombineCandidate(Instruction *Or)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:384
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static Constant * getBinOpAbsorber(unsigned Opcode, Type *Ty)
Return the absorbing element for the given binary operation, i.e.
Definition: Constants.cpp:2724
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2653
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2587
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
Legacy wrapper pass to provide the GlobalsAAResult object.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1552
Value * CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1579
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1592
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1366
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void dropLocation()
Drop the instruction's debug location.
Definition: DebugInfo.cpp:967
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
const char * getOpcodeName() const
Definition: Instruction.h:276
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
bool areAllPreserved() const
Test whether all analyses are preserved (and none are abandoned).
Definition: Analysis.h:281
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
Reassociate commutative expressions.
Definition: Reassociate.h:85
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:544
void deleteValue()
Delete a pointer to a generic Value.
Definition: Value.cpp:110
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
Utility class representing a non-constant Xor-operand.
Value * getSymbolicPart() const
unsigned getSymbolicRank() const
void setSymbolicRank(unsigned R)
const APInt & getConstPart() const
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:316
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool haveNoCommonBitsSet(const WithCache< const Value * > &LHSCache, const WithCache< const Value * > &RHSCache, const SimplifyQuery &SQ)
Return true if LHS and RHS have no common bits set.
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1671
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1440
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2013
FunctionPass * createReassociatePass()
void initializeReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in @llvm.dbg intrinsics with undef.
Definition: Local.cpp:617
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1954
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Utility class representing a base and exponent pair which form one factor of some product.
Definition: Reassociate.h:60