LLVM 20.0.0git
NaryReassociate.cpp
Go to the documentation of this file.
1//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass reassociates n-ary add expressions and eliminates the redundancy
10// exposed by the reassociation.
11//
12// A motivating example:
13//
14// void foo(int a, int b) {
15// bar(a + b);
16// bar((a + 2) + b);
17// }
18//
19// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
20// the above code to
21//
22// int t = a + b;
23// bar(t);
24// bar(t + 2);
25//
26// However, the Reassociate pass is unable to do that because it processes each
27// instruction individually and believes (a + 2) + b is the best form according
28// to its rank system.
29//
30// To address this limitation, NaryReassociate reassociates an expression in a
31// form that reuses existing instructions. As a result, NaryReassociate can
32// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
33// (a + b) is computed before.
34//
35// NaryReassociate works as follows. For every instruction in the form of (a +
36// b) + c, it checks whether a + c or b + c is already computed by a dominating
37// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
38// c) + a and removes the redundancy accordingly. To efficiently look up whether
39// an expression is computed before, we store each instruction seen and its SCEV
40// into an SCEV-to-instruction map.
41//
42// Although the algorithm pattern-matches only ternary additions, it
43// automatically handles many >3-ary expressions by walking through the function
44// in the depth-first order. For example, given
45//
46// (a + c) + d
47// ((a + b) + c) + d
48//
49// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
50// ((a + c) + b) + d into ((a + c) + d) + b.
51//
52// Finally, the above dominator-based algorithm may need to be run multiple
53// iterations before emitting optimal code. One source of this need is that we
54// only split an operand when it is used only once. The above algorithm can
55// eliminate an instruction and decrease the usage count of its operands. As a
56// result, an instruction that previously had multiple uses may become a
57// single-use instruction and thus eligible for split consideration. For
58// example,
59//
60// ac = a + c
61// ab = a + b
62// abc = ab + c
63// ab2 = ab + b
64// ab2c = ab2 + c
65//
66// In the first iteration, we cannot reassociate abc to ac+b because ab is used
67// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
68// result, ab2 becomes dead and ab will be used only once in the second
69// iteration.
70//
71// Limitations and TODO items:
72//
73// 1) We only considers n-ary adds and muls for now. This should be extended
74// and generalized.
75//
76//===----------------------------------------------------------------------===//
77
87#include "llvm/IR/BasicBlock.h"
88#include "llvm/IR/Constants.h"
89#include "llvm/IR/DataLayout.h"
91#include "llvm/IR/Dominators.h"
92#include "llvm/IR/Function.h"
94#include "llvm/IR/IRBuilder.h"
95#include "llvm/IR/InstrTypes.h"
96#include "llvm/IR/Instruction.h"
98#include "llvm/IR/Module.h"
99#include "llvm/IR/Operator.h"
100#include "llvm/IR/PatternMatch.h"
101#include "llvm/IR/Type.h"
102#include "llvm/IR/Value.h"
103#include "llvm/IR/ValueHandle.h"
105#include "llvm/Pass.h"
106#include "llvm/Support/Casting.h"
111#include <cassert>
112#include <cstdint>
113
114using namespace llvm;
115using namespace PatternMatch;
116
117#define DEBUG_TYPE "nary-reassociate"
118
119namespace {
120
121class NaryReassociateLegacyPass : public FunctionPass {
122public:
123 static char ID;
124
125 NaryReassociateLegacyPass() : FunctionPass(ID) {
127 }
128
129 bool doInitialization(Module &M) override {
130 return false;
131 }
132
133 bool runOnFunction(Function &F) override;
134
135 void getAnalysisUsage(AnalysisUsage &AU) const override {
144 AU.setPreservesCFG();
145 }
146
147private:
149};
150
151} // end anonymous namespace
152
153char NaryReassociateLegacyPass::ID = 0;
154
155INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate",
156 "Nary reassociation", false, false)
162INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate",
164
166 return new NaryReassociateLegacyPass();
167}
168
169bool NaryReassociateLegacyPass::runOnFunction(Function &F) {
170 if (skipFunction(F))
171 return false;
172
173 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
174 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
175 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
176 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
177 auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
178
179 return Impl.runImpl(F, AC, DT, SE, TLI, TTI);
180}
181
184 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
185 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
186 auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
187 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
188 auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
189
190 if (!runImpl(F, AC, DT, SE, TLI, TTI))
191 return PreservedAnalyses::all();
192
196 return PA;
197}
198
201 TargetLibraryInfo *TLI_,
202 TargetTransformInfo *TTI_) {
203 AC = AC_;
204 DT = DT_;
205 SE = SE_;
206 TLI = TLI_;
207 TTI = TTI_;
208 DL = &F.getDataLayout();
209
210 bool Changed = false, ChangedInThisIteration;
211 do {
212 ChangedInThisIteration = doOneIteration(F);
213 Changed |= ChangedInThisIteration;
214 } while (ChangedInThisIteration);
215 return Changed;
216}
217
218bool NaryReassociatePass::doOneIteration(Function &F) {
219 bool Changed = false;
220 SeenExprs.clear();
221 // Process the basic blocks in a depth first traversal of the dominator
222 // tree. This order ensures that all bases of a candidate are in Candidates
223 // when we process it.
225 for (const auto Node : depth_first(DT)) {
226 BasicBlock *BB = Node->getBlock();
227 for (Instruction &OrigI : *BB) {
228 const SCEV *OrigSCEV = nullptr;
229 if (Instruction *NewI = tryReassociate(&OrigI, OrigSCEV)) {
230 Changed = true;
231 OrigI.replaceAllUsesWith(NewI);
232
233 // Add 'OrigI' to the list of dead instructions.
234 DeadInsts.push_back(WeakTrackingVH(&OrigI));
235 // Add the rewritten instruction to SeenExprs; the original
236 // instruction is deleted.
237 const SCEV *NewSCEV = SE->getSCEV(NewI);
238 SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));
239
240 // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
241 // is equivalent to I. However, ScalarEvolution::getSCEV may
242 // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
243 // suppose we reassociate
244 // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
245 // to
246 // NewI = &a[sext(i)] + sext(j).
247 //
248 // ScalarEvolution computes
249 // getSCEV(I) = a + 4 * sext(i + j)
250 // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)
251 // which are different SCEVs.
252 //
253 // To alleviate this issue of ScalarEvolution not always capturing
254 // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
255 // map both SCEV before and after tryReassociate(I) to I.
256 //
257 // This improvement is exercised in @reassociate_gep_nsw in
258 // nary-gep.ll.
259 if (NewSCEV != OrigSCEV)
260 SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));
261 } else if (OrigSCEV)
262 SeenExprs[OrigSCEV].push_back(WeakTrackingVH(&OrigI));
263 }
264 }
265 // Delete all dead instructions from 'DeadInsts'.
266 // Please note ScalarEvolution is updated along the way.
268 DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });
269
270 return Changed;
271}
272
273template <typename PredT>
275NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
276 const SCEV *&OrigSCEV) {
277 Value *LHS = nullptr;
278 Value *RHS = nullptr;
279
280 auto MinMaxMatcher =
282 m_Value(LHS), m_Value(RHS));
283 if (match(I, MinMaxMatcher)) {
284 OrigSCEV = SE->getSCEV(I);
285 if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
286 tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
287 return NewMinMax;
288 if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
289 tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
290 return NewMinMax;
291 }
292 return nullptr;
293}
294
295Instruction *NaryReassociatePass::tryReassociate(Instruction * I,
296 const SCEV *&OrigSCEV) {
297
298 if (!SE->isSCEVable(I->getType()))
299 return nullptr;
300
301 switch (I->getOpcode()) {
302 case Instruction::Add:
303 case Instruction::Mul:
304 OrigSCEV = SE->getSCEV(I);
305 return tryReassociateBinaryOp(cast<BinaryOperator>(I));
306 case Instruction::GetElementPtr:
307 OrigSCEV = SE->getSCEV(I);
308 return tryReassociateGEP(cast<GetElementPtrInst>(I));
309 default:
310 break;
311 }
312
313 // Try to match signed/unsigned Min/Max.
314 Instruction *ResI = nullptr;
315 // TODO: Currently min/max reassociation is restricted to integer types only
316 // due to use of SCEVExpander which my introduce incompatible forms of min/max
317 // for pointer types.
318 if (I->getType()->isIntegerTy())
319 if ((ResI = matchAndReassociateMinOrMax<umin_pred_ty>(I, OrigSCEV)) ||
320 (ResI = matchAndReassociateMinOrMax<smin_pred_ty>(I, OrigSCEV)) ||
321 (ResI = matchAndReassociateMinOrMax<umax_pred_ty>(I, OrigSCEV)) ||
322 (ResI = matchAndReassociateMinOrMax<smax_pred_ty>(I, OrigSCEV)))
323 return ResI;
324
325 return nullptr;
326}
327
329 const TargetTransformInfo *TTI) {
330 SmallVector<const Value *, 4> Indices(GEP->indices());
331 return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
333}
334
335Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {
336 // Not worth reassociating GEP if it is foldable.
337 if (isGEPFoldable(GEP, TTI))
338 return nullptr;
339
341 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
342 if (GTI.isSequential()) {
343 if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1,
344 GTI.getIndexedType())) {
345 return NewGEP;
346 }
347 }
348 }
349 return nullptr;
350}
351
352bool NaryReassociatePass::requiresSignExtension(Value *Index,
354 unsigned IndexSizeInBits =
355 DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace());
356 return cast<IntegerType>(Index->getType())->getBitWidth() < IndexSizeInBits;
357}
358
360NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
361 unsigned I, Type *IndexedType) {
362 SimplifyQuery SQ(*DL, DT, AC, GEP);
363 Value *IndexToSplit = GEP->getOperand(I + 1);
364 if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
365 IndexToSplit = SExt->getOperand(0);
366 } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
367 // zext can be treated as sext if the source is non-negative.
368 if (isKnownNonNegative(ZExt->getOperand(0), SQ))
369 IndexToSplit = ZExt->getOperand(0);
370 }
371
372 if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
373 // If the I-th index needs sext and the underlying add is not equipped with
374 // nsw, we cannot split the add because
375 // sext(LHS + RHS) != sext(LHS) + sext(RHS).
376 if (requiresSignExtension(IndexToSplit, GEP) &&
378 return nullptr;
379
380 Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
381 // IndexToSplit = LHS + RHS.
382 if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
383 return NewGEP;
384 // Symmetrically, try IndexToSplit = RHS + LHS.
385 if (LHS != RHS) {
386 if (auto *NewGEP =
387 tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))
388 return NewGEP;
389 }
390 }
391 return nullptr;
392}
393
395NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
396 unsigned I, Value *LHS,
397 Value *RHS, Type *IndexedType) {
398 // Look for GEP's closest dominator that has the same SCEV as GEP except that
399 // the I-th index is replaced with LHS.
401 for (Use &Index : GEP->indices())
402 IndexExprs.push_back(SE->getSCEV(Index));
403 // Replace the I-th index with LHS.
404 IndexExprs[I] = SE->getSCEV(LHS);
405 if (isKnownNonNegative(LHS, SimplifyQuery(*DL, DT, AC, GEP)) &&
407 DL->getTypeSizeInBits(GEP->getOperand(I)->getType())
408 .getFixedValue()) {
409 // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
410 // zext if the source operand is proved non-negative. We should do that
411 // consistently so that CandidateExpr more likely appears before. See
412 // @reassociate_gep_assume for an example of this canonicalization.
413 IndexExprs[I] =
414 SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
415 }
416 const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(GEP),
417 IndexExprs);
418
419 Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
420 if (Candidate == nullptr)
421 return nullptr;
422
423 IRBuilder<> Builder(GEP);
424 // Candidate should have the same pointer type as GEP.
425 assert(Candidate->getType() == GEP->getType());
426
427 // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
428 uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
429 Type *ElementType = GEP->getResultElementType();
430 uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
431 // Another less rare case: because I is not necessarily the last index of the
432 // GEP, the size of the type at the I-th index (IndexedSize) is not
433 // necessarily divisible by ElementSize. For example,
434 //
435 // #pragma pack(1)
436 // struct S {
437 // int a[3];
438 // int64 b[8];
439 // };
440 // #pragma pack()
441 //
442 // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
443 //
444 // TODO: bail out on this case for now. We could emit uglygep.
445 if (IndexedSize % ElementSize != 0)
446 return nullptr;
447
448 // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
449 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
450 if (RHS->getType() != PtrIdxTy)
451 RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy);
452 if (IndexedSize != ElementSize) {
453 RHS = Builder.CreateMul(
454 RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize));
455 }
456 GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
457 Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
458 NewGEP->setIsInBounds(GEP->isInBounds());
459 NewGEP->takeName(GEP);
460 return NewGEP;
461}
462
463Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) {
464 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
465 // There is no need to reassociate 0.
466 if (SE->getSCEV(I)->isZero())
467 return nullptr;
468 if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
469 return NewI;
470 if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
471 return NewI;
472 return nullptr;
473}
474
475Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS,
476 BinaryOperator *I) {
477 Value *A = nullptr, *B = nullptr;
478 // To be conservative, we reassociate I only when it is the only user of (A op
479 // B).
480 if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
481 // I = (A op B) op RHS
482 // = (A op RHS) op B or (B op RHS) op A
483 const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
484 const SCEV *RHSExpr = SE->getSCEV(RHS);
485 if (BExpr != RHSExpr) {
486 if (auto *NewI =
487 tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
488 return NewI;
489 }
490 if (AExpr != RHSExpr) {
491 if (auto *NewI =
492 tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
493 return NewI;
494 }
495 }
496 return nullptr;
497}
498
499Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,
500 Value *RHS,
501 BinaryOperator *I) {
502 // Look for the closest dominator LHS of I that computes LHSExpr, and replace
503 // I with LHS op RHS.
504 auto *LHS = findClosestMatchingDominator(LHSExpr, I);
505 if (LHS == nullptr)
506 return nullptr;
507
508 Instruction *NewI = nullptr;
509 switch (I->getOpcode()) {
510 case Instruction::Add:
511 NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I->getIterator());
512 break;
513 case Instruction::Mul:
514 NewI = BinaryOperator::CreateMul(LHS, RHS, "", I->getIterator());
515 break;
516 default:
517 llvm_unreachable("Unexpected instruction.");
518 }
519 NewI->setDebugLoc(I->getDebugLoc());
520 NewI->takeName(I);
521 return NewI;
522}
523
524bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V,
525 Value *&Op1, Value *&Op2) {
526 switch (I->getOpcode()) {
527 case Instruction::Add:
528 return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
529 case Instruction::Mul:
530 return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
531 default:
532 llvm_unreachable("Unexpected instruction.");
533 }
534 return false;
535}
536
537const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I,
538 const SCEV *LHS,
539 const SCEV *RHS) {
540 switch (I->getOpcode()) {
541 case Instruction::Add:
542 return SE->getAddExpr(LHS, RHS);
543 case Instruction::Mul:
544 return SE->getMulExpr(LHS, RHS);
545 default:
546 llvm_unreachable("Unexpected instruction.");
547 }
548 return nullptr;
549}
550
552NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
553 Instruction *Dominatee) {
554 auto Pos = SeenExprs.find(CandidateExpr);
555 if (Pos == SeenExprs.end())
556 return nullptr;
557
558 auto &Candidates = Pos->second;
559 // Because we process the basic blocks in pre-order of the dominator tree, a
560 // candidate that doesn't dominate the current instruction won't dominate any
561 // future instruction either. Therefore, we pop it out of the stack. This
562 // optimization makes the algorithm O(n).
563 while (!Candidates.empty()) {
564 // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
565 // removed during rewriting.
566 if (Value *Candidate = Candidates.pop_back_val()) {
567 Instruction *CandidateInstruction = cast<Instruction>(Candidate);
568 if (!DT->dominates(CandidateInstruction, Dominatee))
569 continue;
570
571 // Make sure that the instruction is safe to reuse without introducing
572 // poison.
573 SmallVector<Instruction *> DropPoisonGeneratingInsts;
574 if (!SE->canReuseInstruction(CandidateExpr, CandidateInstruction,
575 DropPoisonGeneratingInsts))
576 continue;
577
578 for (Instruction *I : DropPoisonGeneratingInsts)
579 I->dropPoisonGeneratingAnnotations();
580
581 return CandidateInstruction;
582 }
583 }
584 return nullptr;
585}
586
587template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {
588 if (std::is_same_v<smax_pred_ty, typename MaxMinT::PredType>)
589 return scSMaxExpr;
590 else if (std::is_same_v<umax_pred_ty, typename MaxMinT::PredType>)
591 return scUMaxExpr;
592 else if (std::is_same_v<smin_pred_ty, typename MaxMinT::PredType>)
593 return scSMinExpr;
594 else if (std::is_same_v<umin_pred_ty, typename MaxMinT::PredType>)
595 return scUMinExpr;
596
597 llvm_unreachable("Can't convert MinMax pattern to SCEV type");
598 return scUnknown;
599}
600
601// Parameters:
602// I - instruction matched by MaxMinMatch matcher
603// MaxMinMatch - min/max idiom matcher
604// LHS - first operand of I
605// RHS - second operand of I
606template <typename MaxMinT>
607Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
608 MaxMinT MaxMinMatch,
609 Value *LHS, Value *RHS) {
610 Value *A = nullptr, *B = nullptr;
611 MaxMinT m_MaxMin(m_Value(A), m_Value(B));
612
613 if (LHS->hasNUsesOrMore(3) ||
614 // The optimization is profitable only if LHS can be removed in the end.
615 // In other words LHS should be used (directly or indirectly) by I only.
617 [&](auto *U) {
618 return U != I &&
619 !(U->hasOneUser() && *U->users().begin() == I);
620 }) ||
621 !match(LHS, m_MaxMin))
622 return nullptr;
623
624 auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
625 const SCEV *BExpr, Value *C,
626 const SCEV *CExpr) -> Value * {
627 SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
628 const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
629 const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
630
631 Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
632
633 if (!R1MinMax)
634 return nullptr;
635
636 LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
637
639 SE->getUnknown(R1MinMax)};
640 const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
641
642 SCEVExpander Expander(*SE, *DL, "nary-reassociate");
643 Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
644 NewMinMax->setName(Twine(I->getName()).concat(".nary"));
645
646 LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
647 << "NARY: Inserting: " << *NewMinMax << "\n");
648 return NewMinMax;
649 };
650
651 const SCEV *AExpr = SE->getSCEV(A);
652 const SCEV *BExpr = SE->getSCEV(B);
653 const SCEV *RHSExpr = SE->getSCEV(RHS);
654
655 if (BExpr != RHSExpr) {
656 // Try (A op RHS) op B
657 if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
658 return NewMinMax;
659 }
660
661 if (AExpr != RHSExpr) {
662 // Try (RHS op B) op A
663 if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
664 return NewMinMax;
665 }
666
667 return nullptr;
668}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static SCEVTypes convertToSCEVype(MaxMinT &MM)
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
nary reassociate
nary Nary reassociation
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:873
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:377
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:621
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
bool isZero() const
Return true if the expression is a constant zero.
This class represents a sign extension of integer types.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getGEPExpr(GEPOperator *GEP, const SmallVectorImpl< const SCEV * > &IndexExprs)
Returns an expression for a GEP.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl< const SCEV * > &Operands)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
@ TCC_Free
Expected to fold away in lowering.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Twine concat(const Twine &Suffix) const
Definition: Twine.h:525
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
ElementType
The element type of an SRV or UAV resource.
Definition: DXILABI.h:58
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ NeverOverflows
Never overflows.
FunctionPass * createNaryReassociatePass()
void initializeNaryReassociateLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
OverflowResult computeOverflowForSignedAdd(const WithCache< const Value * > &LHS, const WithCache< const Value * > &RHS, const SimplifyQuery &SQ)
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:555
gep_type_iterator gep_type_begin(const User *GEP)
iterator_range< df_iterator< T > > depth_first(const T &G)
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.