doxygen/NaryReassociate_8cpp_source.html

//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass reassociates n-ary add expressions and eliminates the redundancy

// exposed by the reassociation.

//

// A motivating example:

//

//   void foo(int a, int b) {

//     bar(a + b);

//     bar((a + 2) + b);

//   }

//

// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify

// the above code to

//

//   int t = a + b;

//   bar(t);

//   bar(t + 2);

//

// However, the Reassociate pass is unable to do that because it processes each

// instruction individually and believes (a + 2) + b is the best form according

// to its rank system.

//

// To address this limitation, NaryReassociate reassociates an expression in a

// form that reuses existing instructions. As a result, NaryReassociate can

// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that

// (a + b) is computed before.

//

// NaryReassociate works as follows. For every instruction in the form of (a +

// b) + c, it checks whether a + c or b + c is already computed by a dominating

// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +

// c) + a and removes the redundancy accordingly. To efficiently look up whether

// an expression is computed before, we store each instruction seen and its SCEV

// into an SCEV-to-instruction map.

//

// Although the algorithm pattern-matches only ternary additions, it

// automatically handles many >3-ary expressions by walking through the function

// in the depth-first order. For example, given

//

//   (a + c) + d

//   ((a + b) + c) + d

//

// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites

// ((a + c) + b) + d into ((a + c) + d) + b.

//

// Finally, the above dominator-based algorithm may need to be run multiple

// iterations before emitting optimal code. One source of this need is that we

// only split an operand when it is used only once. The above algorithm can

// eliminate an instruction and decrease the usage count of its operands. As a

// result, an instruction that previously had multiple uses may become a

// single-use instruction and thus eligible for split consideration. For

// example,

//

//   ac = a + c

//   ab = a + b

//   abc = ab + c

//   ab2 = ab + b

//   ab2c = ab2 + c

//

// In the first iteration, we cannot reassociate abc to ac+b because ab is used

// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a

// result, ab2 becomes dead and ab will be used only once in the second

// iteration.

//

// Limitations and TODO items:

//

// 1) We only considers n-ary adds and muls for now. This should be extended

// and generalized.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/NaryReassociate.h"

#include "llvm/ADT/DepthFirstIterator.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <cassert>

#include <cstdint>


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "nary-reassociate"


namespace {


class NaryReassociateLegacyPass : public FunctionPass {

public:

  static char ID;


  NaryReassociateLegacyPass() : FunctionPass(ID) {

    initializeNaryReassociateLegacyPassPass(*PassRegistry::getPassRegistry());

  }


  bool doInitialization(Module &M) override {

    return false;

  }


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addPreserved<DominatorTreeWrapperPass>();

    AU.addPreserved<ScalarEvolutionWrapperPass>();

    AU.addPreserved<TargetLibraryInfoWrapperPass>();

    AU.addRequired<AssumptionCacheTracker>();

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addRequired<ScalarEvolutionWrapperPass>();

    AU.addRequired<TargetLibraryInfoWrapperPass>();

    AU.addRequired<TargetTransformInfoWrapperPass>();

    AU.setPreservesCFG();

  }


private:

  NaryReassociatePass Impl;

};


} // end anonymous namespace


char NaryReassociateLegacyPass::ID = 0;


INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate",

                      "Nary reassociation", false, false)

INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate",

                    "Nary reassociation", false, false)


FunctionPass *llvm::createNaryReassociatePass() {

  return new NaryReassociateLegacyPass();

}


bool NaryReassociateLegacyPass::runOnFunction(Function &F) {

  if (skipFunction(F))

    return false;


  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();

  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);


  return Impl.runImpl(F, AC, DT, SE, TLI, TTI);

}


PreservedAnalyses NaryReassociatePass::run(Function &F,

                                           FunctionAnalysisManager &AM) {

  auto *AC = &AM.getResult<AssumptionAnalysis>(F);

  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);

  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);

  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);

  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);


  if (!runImpl(F, AC, DT, SE, TLI, TTI))

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  PA.preserve<ScalarEvolutionAnalysis>();

  return PA;

}


bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_,

                                  DominatorTree *DT_, ScalarEvolution *SE_,

                                  TargetLibraryInfo *TLI_,

                                  TargetTransformInfo *TTI_) {

  AC = AC_;

  DT = DT_;

  SE = SE_;

  TLI = TLI_;

  TTI = TTI_;

  DL = &F.getDataLayout();


  bool Changed = false, ChangedInThisIteration;

  do {

    ChangedInThisIteration = doOneIteration(F);

    Changed |= ChangedInThisIteration;

  } while (ChangedInThisIteration);

  return Changed;

}


bool NaryReassociatePass::doOneIteration(Function &F) {

  bool Changed = false;

  SeenExprs.clear();

  // Process the basic blocks in a depth first traversal of the dominator

  // tree. This order ensures that all bases of a candidate are in Candidates

  // when we process it.

  SmallVector<WeakTrackingVH, 16> DeadInsts;

  for (const auto Node : depth_first(DT)) {

    BasicBlock *BB = Node->getBlock();

    for (Instruction &OrigI : *BB) {

      const SCEV *OrigSCEV = nullptr;

      if (Instruction *NewI = tryReassociate(&OrigI, OrigSCEV)) {

        Changed = true;

        OrigI.replaceAllUsesWith(NewI);


        // Add 'OrigI' to the list of dead instructions.

        DeadInsts.push_back(WeakTrackingVH(&OrigI));

        // Add the rewritten instruction to SeenExprs; the original

        // instruction is deleted.

        const SCEV *NewSCEV = SE->getSCEV(NewI);

        SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));


        // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)

        // is equivalent to I. However, ScalarEvolution::getSCEV may

        // weaken nsw causing NewSCEV not to equal OldSCEV. For example,

        // suppose we reassociate

        //   I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4

        // to

        //   NewI = &a[sext(i)] + sext(j).

        //

        // ScalarEvolution computes

        //   getSCEV(I)    = a + 4 * sext(i + j)

        //   getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)

        // which are different SCEVs.

        //

        // To alleviate this issue of ScalarEvolution not always capturing

        // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can

        // map both SCEV before and after tryReassociate(I) to I.

        //

        // This improvement is exercised in @reassociate_gep_nsw in

        // nary-gep.ll.

        if (NewSCEV != OrigSCEV)

          SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));

      } else if (OrigSCEV)

        SeenExprs[OrigSCEV].push_back(WeakTrackingVH(&OrigI));

    }

  }

  // Delete all dead instructions from 'DeadInsts'.

  // Please note ScalarEvolution is updated along the way.

  RecursivelyDeleteTriviallyDeadInstructionsPermissive(

      DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });


  return Changed;

}


template <typename PredT>

Instruction *

NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,

                                                 const SCEV *&OrigSCEV) {

  Value *LHS = nullptr;

  Value *RHS = nullptr;


  auto MinMaxMatcher =

      MaxMin_match<ICmpInst, bind_ty<Value>, bind_ty<Value>, PredT>(

          m_Value(LHS), m_Value(RHS));

  if (match(I, MinMaxMatcher)) {

    OrigSCEV = SE->getSCEV(I);

    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(

            tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))

      return NewMinMax;

    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(

            tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))

      return NewMinMax;

  }

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociate(Instruction * I,

                                                 const SCEV *&OrigSCEV) {


  if (!SE->isSCEVable(I->getType()))

    return nullptr;


  switch (I->getOpcode()) {

  case Instruction::Add:

  case Instruction::Mul:

    OrigSCEV = SE->getSCEV(I);

    return tryReassociateBinaryOp(cast<BinaryOperator>(I));

  case Instruction::GetElementPtr:

    OrigSCEV = SE->getSCEV(I);

    return tryReassociateGEP(cast<GetElementPtrInst>(I));

  default:

    break;

  }


  // Try to match signed/unsigned Min/Max.

  Instruction *ResI = nullptr;

  // TODO: Currently min/max reassociation is restricted to integer types only

  // due to use of SCEVExpander which my introduce incompatible forms of min/max

  // for pointer types.

  if (I->getType()->isIntegerTy())

    if ((ResI = matchAndReassociateMinOrMax<umin_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<smin_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<umax_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<smax_pred_ty>(I, OrigSCEV)))

      return ResI;


  return nullptr;

}


static bool isGEPFoldable(GetElementPtrInst *GEP,

                          const TargetTransformInfo *TTI) {

  SmallVector<const Value *, 4> Indices(GEP->indices());

  return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),

                         Indices) == TargetTransformInfo::TCC_Free;

}


Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {

  // Not worth reassociating GEP if it is foldable.

  if (isGEPFoldable(GEP, TTI))

    return nullptr;


  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1,

                                                  GTI.getIndexedType())) {

        return NewGEP;

      }

    }

  }

  return nullptr;

}


bool NaryReassociatePass::requiresSignExtension(Value *Index,

                                                GetElementPtrInst *GEP) {

  unsigned IndexSizeInBits =

      DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace());

  return cast<IntegerType>(Index->getType())->getBitWidth() < IndexSizeInBits;

}


GetElementPtrInst *

NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,

                                              unsigned I, Type *IndexedType) {

  SimplifyQuery SQ(*DL, DT, AC, GEP);

  Value *IndexToSplit = GEP->getOperand(I + 1);

  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {

    IndexToSplit = SExt->getOperand(0);

  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {

    // zext can be treated as sext if the source is non-negative.

    if (isKnownNonNegative(ZExt->getOperand(0), SQ))

      IndexToSplit = ZExt->getOperand(0);

  }


  if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {

    // If the I-th index needs sext and the underlying add is not equipped with

    // nsw, we cannot split the add because

    //   sext(LHS + RHS) != sext(LHS) + sext(RHS).

    if (requiresSignExtension(IndexToSplit, GEP) &&

        computeOverflowForSignedAdd(AO, SQ) != OverflowResult::NeverOverflows)

      return nullptr;


    Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);

    // IndexToSplit = LHS + RHS.

    if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))

      return NewGEP;

    // Symmetrically, try IndexToSplit = RHS + LHS.

    if (LHS != RHS) {

      if (auto *NewGEP =

              tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))

        return NewGEP;

    }

  }

  return nullptr;

}


GetElementPtrInst *

NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,

                                              unsigned I, Value *LHS,

                                              Value *RHS, Type *IndexedType) {

  // Look for GEP's closest dominator that has the same SCEV as GEP except that

  // the I-th index is replaced with LHS.

  SmallVector<const SCEV *, 4> IndexExprs;

  for (Use &Index : GEP->indices())

    IndexExprs.push_back(SE->getSCEV(Index));

  // Replace the I-th index with LHS.

  IndexExprs[I] = SE->getSCEV(LHS);

  if (isKnownNonNegative(LHS, SimplifyQuery(*DL, DT, AC, GEP)) &&

      DL->getTypeSizeInBits(LHS->getType()).getFixedValue() <

          DL->getTypeSizeInBits(GEP->getOperand(I)->getType())

              .getFixedValue()) {

    // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to

    // zext if the source operand is proved non-negative. We should do that

    // consistently so that CandidateExpr more likely appears before. See

    // @reassociate_gep_assume for an example of this canonicalization.

    IndexExprs[I] =

        SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());

  }

  const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(GEP),

                                             IndexExprs);


  Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);

  if (Candidate == nullptr)

    return nullptr;


  IRBuilder<> Builder(GEP);

  // Candidate should have the same pointer type as GEP.

  assert(Candidate->getType() == GEP->getType());


  // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)

  uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);

  Type *ElementType = GEP->getResultElementType();

  uint64_t ElementSize = DL->getTypeAllocSize(ElementType);

  // Another less rare case: because I is not necessarily the last index of the

  // GEP, the size of the type at the I-th index (IndexedSize) is not

  // necessarily divisible by ElementSize. For example,

  //

  // #pragma pack(1)

  // struct S {

  //   int a[3];

  //   int64 b[8];

  // };

  // #pragma pack()

  //

  // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.

  //

  // TODO: bail out on this case for now. We could emit uglygep.

  if (IndexedSize % ElementSize != 0)

    return nullptr;


  // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));

  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  if (RHS->getType() != PtrIdxTy)

    RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy);

  if (IndexedSize != ElementSize) {

    RHS = Builder.CreateMul(

        RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize));

  }

  GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(

      Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));

  NewGEP->setIsInBounds(GEP->isInBounds());

  NewGEP->takeName(GEP);

  return NewGEP;

}


Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) {

  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);

  // There is no need to reassociate 0.

  if (SE->getSCEV(I)->isZero())

    return nullptr;

  if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))

    return NewI;

  if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))

    return NewI;

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS,

                                                         BinaryOperator *I) {

  Value *A = nullptr, *B = nullptr;

  // To be conservative, we reassociate I only when it is the only user of (A op

  // B).

  if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {

    // I = (A op B) op RHS

    //   = (A op RHS) op B or (B op RHS) op A

    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);

    const SCEV *RHSExpr = SE->getSCEV(RHS);

    if (BExpr != RHSExpr) {

      if (auto *NewI =

              tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))

        return NewI;

    }

    if (AExpr != RHSExpr) {

      if (auto *NewI =

              tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))

        return NewI;

    }

  }

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,

                                                          Value *RHS,

                                                          BinaryOperator *I) {

  // Look for the closest dominator LHS of I that computes LHSExpr, and replace

  // I with LHS op RHS.

  auto *LHS = findClosestMatchingDominator(LHSExpr, I);

  if (LHS == nullptr)

    return nullptr;


  Instruction *NewI = nullptr;

  switch (I->getOpcode()) {

  case Instruction::Add:

    NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I->getIterator());

    break;

  case Instruction::Mul:

    NewI = BinaryOperator::CreateMul(LHS, RHS, "", I->getIterator());

    break;

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  NewI->setDebugLoc(I->getDebugLoc());

  NewI->takeName(I);

  return NewI;

}


bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V,

                                         Value *&Op1, Value *&Op2) {

  switch (I->getOpcode()) {

  case Instruction::Add:

    return match(V, m_Add(m_Value(Op1), m_Value(Op2)));

  case Instruction::Mul:

    return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  return false;

}


const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I,

                                               const SCEV *LHS,

                                               const SCEV *RHS) {

  switch (I->getOpcode()) {

  case Instruction::Add:

    return SE->getAddExpr(LHS, RHS);

  case Instruction::Mul:

    return SE->getMulExpr(LHS, RHS);

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  return nullptr;

}


Instruction *

NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,

                                                  Instruction *Dominatee) {

  auto Pos = SeenExprs.find(CandidateExpr);

  if (Pos == SeenExprs.end())

    return nullptr;


  auto &Candidates = Pos->second;

  // Because we process the basic blocks in pre-order of the dominator tree, a

  // candidate that doesn't dominate the current instruction won't dominate any

  // future instruction either. Therefore, we pop it out of the stack. This

  // optimization makes the algorithm O(n).

  while (!Candidates.empty()) {

    // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's

    // removed during rewriting.

    if (Value *Candidate = Candidates.pop_back_val()) {

      Instruction *CandidateInstruction = cast<Instruction>(Candidate);

      if (!DT->dominates(CandidateInstruction, Dominatee))

        continue;


      // Make sure that the instruction is safe to reuse without introducing

      // poison.

      SmallVector<Instruction *> DropPoisonGeneratingInsts;

      if (!SE->canReuseInstruction(CandidateExpr, CandidateInstruction,

                                   DropPoisonGeneratingInsts))

        continue;


      for (Instruction *I : DropPoisonGeneratingInsts)

        I->dropPoisonGeneratingAnnotations();


      return CandidateInstruction;

    }

  }

  return nullptr;

}


template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {

  if (std::is_same_v<smax_pred_ty, typename MaxMinT::PredType>)

    return scSMaxExpr;

  else if (std::is_same_v<umax_pred_ty, typename MaxMinT::PredType>)

    return scUMaxExpr;

  else if (std::is_same_v<smin_pred_ty, typename MaxMinT::PredType>)

    return scSMinExpr;

  else if (std::is_same_v<umin_pred_ty, typename MaxMinT::PredType>)

    return scUMinExpr;


  llvm_unreachable("Can't convert MinMax pattern to SCEV type");

  return scUnknown;

}


// Parameters:

//  I - instruction matched by MaxMinMatch matcher

//  MaxMinMatch - min/max idiom matcher

//  LHS - first operand of I

//  RHS - second operand of I

template <typename MaxMinT>

Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,

                                                   MaxMinT MaxMinMatch,

                                                   Value *LHS, Value *RHS) {

  Value *A = nullptr, *B = nullptr;

  MaxMinT m_MaxMin(m_Value(A), m_Value(B));


  if (LHS->hasNUsesOrMore(3) ||

      // The optimization is profitable only if LHS can be removed in the end.

      // In other words LHS should be used (directly or indirectly) by I only.

      llvm::any_of(LHS->users(),

                    [&](auto *U) {

                      return U != I &&

                             !(U->hasOneUser() && *U->users().begin() == I);

                    }) ||

      !match(LHS, m_MaxMin))

    return nullptr;


  auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,

                            const SCEV *BExpr, Value *C,

                            const SCEV *CExpr) -> Value * {

    SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};

    const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);

    const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);


    Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);


    if (!R1MinMax)

      return nullptr;


    LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");


    SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),

                                      SE->getUnknown(R1MinMax)};

    const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);


    SCEVExpander Expander(*SE, *DL, "nary-reassociate");

    Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);

    NewMinMax->setName(Twine(I->getName()).concat(".nary"));


    LLVM_DEBUG(dbgs() << "NARY: Deleting:  " << *I << "\n"

                      << "NARY: Inserting: " << *NewMinMax << "\n");

    return NewMinMax;

  };


  const SCEV *AExpr = SE->getSCEV(A);

  const SCEV *BExpr = SE->getSCEV(B);

  const SCEV *RHSExpr = SE->getSCEV(RHS);


  if (BExpr != RHSExpr) {

    // Try (A op RHS) op B

    if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))

      return NewMinMax;

  }


  if (AExpr != RHSExpr) {

    // Try (RHS op B) op A

    if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))

      return NewMinMax;

  }


  return nullptr;

}

AssumptionCache.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Casting.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

DepthFirstIterator.h
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

DerivedTypes.h

Dominators.h

GetElementPtrTypeIterator.h

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:170

IRBuilder.h

BasicBlock.h

Function.h

Instruction.h

Module.h
Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Value.h

InitializePasses.h

InstrTypes.h

Instructions.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

convertToSCEVype
static SCEVTypes convertToSCEVype(MaxMinT &MM)
Definition: NaryReassociate.cpp:587

isGEPFoldable
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
Definition: NaryReassociate.cpp:328

reassociate
nary reassociate
Definition: NaryReassociate.cpp:162

reassociation
nary Nary reassociation
Definition: NaryReassociate.cpp:163

NaryReassociate.h

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

PatternMatch.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolution.h

Scalar.h

SmallVector.h
This file defines the SmallVector class.

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

ValueHandle.h

ValueTracking.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

llvm::AddOperator
Definition: Operator.h:405

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173

llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:204

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BinaryOperator
Definition: InstrTypes.h:170

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72

llvm::DataLayout::getIndexType
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:878

llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457

llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:369

llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:617

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::Function
Definition: Function.h:63

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933

llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1556

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::NaryReassociatePass
Definition: NaryReassociate.h:102

llvm::NaryReassociatePass::runImpl
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
Definition: NaryReassociate.cpp:199

llvm::NaryReassociatePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: NaryReassociate.cpp:182

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::Pass::doInitialization
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146

llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:63

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:71

llvm::SCEV::isZero
bool isZero() const
Return true if the expression is a constant zero.
Definition: ScalarEvolution.cpp:448

llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4600

llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2320

llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2352

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:447

llvm::ScalarEvolution::getGEPExpr
const SCEV * getGEPExpr(GEPOperator *GEP, const SmallVectorImpl< const SCEV * > &IndexExprs)
Returns an expression for a GEP.
Definition: ScalarEvolution.cpp:3736

llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4547

llvm::ScalarEvolution::getZeroExtendExpr
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition: ScalarEvolution.cpp:1565

llvm::ScalarEvolution::isSCEVable
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition: ScalarEvolution.cpp:4441

llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8542

llvm::ScalarEvolution::getMinMaxExpr
const SCEV * getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl< const SCEV * > &Operands)
Definition: ScalarEvolution.cpp:3828

llvm::ScalarEvolution::getMulExpr
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:3106

llvm::ScalarEvolution::getUnknown
const SCEV * getUnknown(Value *V)
Definition: ScalarEvolution.cpp:4411

llvm::ScalarEvolution::canReuseInstruction
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
Definition: ScalarEvolution.cpp:4168

llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2526

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:3172

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:614

llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:639

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:280

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:3228

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:212

llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:248

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:289

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Twine::concat
Twine concat(const Twine &Suffix) const
Definition: Twine.h:525

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421

llvm::Value::hasNUsesOrMore
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153

llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383

llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204

llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4569

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202

llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31

llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition: GetElementPtrTypeIterator.h:147

llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:102

uint64_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:193

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1102

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49

llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1168

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92

llvm::dwarf::Index
Index
Definition: Dwarf.h:882

llvm::dxil::ElementType
ElementType
The element type of an SRV or UAV resource.
Definition: DXILABI.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.

llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165

llvm::initializeNaryReassociateLegacyPassPass
void initializeNaryReassociateLegacyPassPass(PassRegistry &)

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::computeOverflowForSignedAdd
OverflowResult computeOverflowForSignedAdd(const WithCache< const Value * > &LHS, const WithCache< const Value * > &RHS, const SimplifyQuery &SQ)
Definition: ValueTracking.cpp:7907

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:561

llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:173

llvm::SCEVTypes
SCEVTypes
Definition: ScalarEvolutionExpressions.h:37

llvm::scUMinExpr
@ scUMinExpr
Definition: ScalarEvolutionExpressions.h:51

llvm::scSMaxExpr
@ scSMaxExpr
Definition: ScalarEvolutionExpressions.h:50

llvm::scUnknown
@ scUnknown
Definition: ScalarEvolutionExpressions.h:55

llvm::scSMinExpr
@ scSMinExpr
Definition: ScalarEvolutionExpressions.h:52

llvm::scUMaxExpr
@ scUMaxExpr
Definition: ScalarEvolutionExpressions.h:49

llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:233

llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:292

llvm::PatternMatch::MaxMin_match
Definition: PatternMatch.h:2209

llvm::PatternMatch::bind_ty
Definition: PatternMatch.h:807

llvm::SimplifyQuery
Definition: SimplifyQuery.h:70